diff options
author | Iaroslav Gridin <iaroslav.gridin@tuni.fi> | 2022-12-12 12:08:59 +0000 |
---|---|---|
committer | Iaroslav Gridin <iaroslav.gridin@tuni.fi> | 2022-12-12 12:08:59 +0000 |
commit | 11526a89fd22a85d5665345c939b0c6e26b0a094 (patch) | |
tree | 31e694a835b2bac38649f3001b508b4fe26b72e5 | |
parent | 14d306857d58e5c628b3797c02bad5911f7caabf (diff) | |
download | nss-hg-11526a89fd22a85d5665345c939b0c6e26b0a094.tar.gz |
Bug 1751705 - Update ECCKiila generated files r=nss-reviewers,nkulatova
Differential Revision: https://phabricator.services.mozilla.com/D135765
-rw-r--r-- | lib/freebl/ecl/ecp_secp384r1.c | 1635 | ||||
-rw-r--r-- | lib/freebl/ecl/ecp_secp521r1.c | 1186 |
2 files changed, 1967 insertions, 854 deletions
diff --git a/lib/freebl/ecl/ecp_secp384r1.c b/lib/freebl/ecl/ecp_secp384r1.c index 19da269b4..1e3ca6c39 100644 --- a/lib/freebl/ecl/ecp_secp384r1.c +++ b/lib/freebl/ecl/ecp_secp384r1.c @@ -1,19 +1,19 @@ /* Autogenerated: ECCKiila https://gitlab.com/nisec/ecckiila */ /*- * MIT License - * + * * Copyright (c) 2020 Luis Rivera-Zamarripa, Jesús-Javier Chi-Domínguez, Billy Bob Brumley - * + * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: - * + * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -88,14 +88,32 @@ typedef struct { /* return values. */ /* */ /* Computed values: */ -/* eval z = z[0] + (z[1] << 64) + (z[2] << 128) + (z[3] << 192) + (z[4] << 256) + (z[5] << 0x140) */ -/* bytes_eval z = z[0] + (z[1] << 8) + (z[2] << 16) + (z[3] << 24) + (z[4] << 32) + (z[5] << 40) + (z[6] << 48) + (z[7] << 56) + (z[8] << 64) + (z[9] << 72) + (z[10] << 80) + (z[11] << 88) + (z[12] << 96) + (z[13] << 104) + (z[14] << 112) + (z[15] << 120) + (z[16] << 128) + (z[17] << 136) + (z[18] << 144) + (z[19] << 152) + (z[20] << 160) + (z[21] << 168) + (z[22] << 176) + (z[23] << 184) + (z[24] << 192) + (z[25] << 200) + (z[26] << 208) + (z[27] << 216) + (z[28] << 224) + (z[29] << 232) + (z[30] << 240) + (z[31] << 248) + (z[32] << 256) + (z[33] << 0x108) + (z[34] << 0x110) + (z[35] << 0x118) + (z[36] << 0x120) + (z[37] << 0x128) + (z[38] << 0x130) + (z[39] << 0x138) + (z[40] << 0x140) + (z[41] << 0x148) + (z[42] << 0x150) + (z[43] << 0x158) + (z[44] << 0x160) + (z[45] << 0x168) + (z[46] << 0x170) + (z[47] << 0x178) */ +/* eval z = z[0] + (z[1] << 64) + (z[2] << 128) + (z[3] << 192) + (z[4] << 256) + (z[5] << 0x140) */ +/* bytes_eval z = z[0] + (z[1] << 8) + (z[2] << 16) + (z[3] << 24) + (z[4] << 32) + (z[5] << 40) + (z[6] << 48) + (z[7] << 56) + (z[8] << 64) + (z[9] << 72) + (z[10] << 80) + (z[11] << 88) + (z[12] << 96) + (z[13] << 104) + (z[14] << 112) + (z[15] << 120) + (z[16] << 128) + (z[17] << 136) + (z[18] << 144) + (z[19] << 152) + (z[20] << 160) + (z[21] << 168) + (z[22] << 176) + (z[23] << 184) + (z[24] << 192) + (z[25] << 200) + (z[26] << 208) + (z[27] << 216) + (z[28] << 224) + (z[29] << 232) + (z[30] << 240) + (z[31] << 248) + (z[32] << 256) + (z[33] << 0x108) + (z[34] << 0x110) + (z[35] << 0x118) + (z[36] << 0x120) + (z[37] << 0x128) + (z[38] << 0x130) + (z[39] << 0x138) + (z[40] << 0x140) + (z[41] << 0x148) + (z[42] << 0x150) + (z[43] << 0x158) + (z[44] << 0x160) + (z[45] << 0x168) + (z[46] << 0x170) + (z[47] << 0x178) */ +/* twos_complement_eval z = let x1 := z[0] + (z[1] << 64) + (z[2] << 128) + (z[3] << 192) + (z[4] << 256) + (z[5] << 0x140) in */ +/* if x1 & (2^384-1) < 2^383 then x1 & (2^384-1) else (x1 & (2^384-1)) - 2^384 */ #include <stdint.h> typedef unsigned char fiat_secp384r1_uint1; typedef signed char fiat_secp384r1_int1; -typedef signed __int128 fiat_secp384r1_int128; -typedef unsigned __int128 fiat_secp384r1_uint128; +#ifdef __GNUC__ +#define FIAT_SECP384R1_FIAT_EXTENSION __extension__ +#define FIAT_SECP384R1_FIAT_INLINE __inline__ +#else +#define FIAT_SECP384R1_FIAT_EXTENSION +#define FIAT_SECP384R1_FIAT_INLINE +#endif + +FIAT_SECP384R1_FIAT_EXTENSION typedef signed __int128 fiat_secp384r1_int128; +FIAT_SECP384R1_FIAT_EXTENSION typedef unsigned __int128 fiat_secp384r1_uint128; + +/* The type fiat_secp384r1_montgomery_domain_field_element is a field element in the Montgomery domain. */ +/* Bounds: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] */ +typedef uint64_t fiat_secp384r1_montgomery_domain_field_element[6]; + +/* The type fiat_secp384r1_non_montgomery_domain_field_element is a field element NOT in the Montgomery domain. */ +/* Bounds: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] */ +typedef uint64_t fiat_secp384r1_non_montgomery_domain_field_element[6]; #if (-1 & 3) != 3 #error "This code only works on a two's complement system" @@ -116,6 +134,7 @@ fiat_secp384r1_value_barrier_u64(uint64_t a) /* * The function fiat_secp384r1_addcarryx_u64 is an addition with carry. + * * Postconditions: * out1 = (arg1 + arg2 + arg3) mod 2^64 * out2 = ⌊(arg1 + arg2 + arg3) / 2^64⌋ @@ -146,6 +165,7 @@ fiat_secp384r1_addcarryx_u64(uint64_t *out1, /* * The function fiat_secp384r1_subborrowx_u64 is a subtraction with borrow. + * * Postconditions: * out1 = (-arg1 + arg2 + -arg3) mod 2^64 * out2 = -⌊(-arg1 + arg2 + -arg3) / 2^64⌋ @@ -176,6 +196,7 @@ fiat_secp384r1_subborrowx_u64(uint64_t *out1, /* * The function fiat_secp384r1_mulx_u64 is a multiplication, returning the full double-width result. + * * Postconditions: * out1 = (arg1 * arg2) mod 2^64 * out2 = ⌊arg1 * arg2 / 2^64⌋ @@ -203,6 +224,7 @@ fiat_secp384r1_mulx_u64(uint64_t *out1, uint64_t *out2, /* * The function fiat_secp384r1_cmovznz_u64 is a single-word conditional move. + * * Postconditions: * out1 = (if arg1 = 0 then arg2 else arg3) * @@ -230,6 +252,7 @@ fiat_secp384r1_cmovznz_u64(uint64_t *out1, /* * The function fiat_secp384r1_mul multiplies two field elements in the Montgomery domain. + * * Preconditions: * 0 ≤ eval arg1 < m * 0 ≤ eval arg2 < m @@ -237,15 +260,12 @@ fiat_secp384r1_cmovznz_u64(uint64_t *out1, * eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) * eval (from_montgomery arg2)) mod m * 0 ≤ eval out1 < m * - * Input Bounds: - * arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] - * arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] - * Output Bounds: - * out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] */ static void -fiat_secp384r1_mul(uint64_t out1[6], const uint64_t arg1[6], - const uint64_t arg2[6]) +fiat_secp384r1_mul( + fiat_secp384r1_montgomery_domain_field_element out1, + const fiat_secp384r1_montgomery_domain_field_element arg1, + const fiat_secp384r1_montgomery_domain_field_element arg2) { uint64_t x1; uint64_t x2; @@ -987,19 +1007,18 @@ fiat_secp384r1_mul(uint64_t out1[6], const uint64_t arg1[6], /* * The function fiat_secp384r1_square squares a field element in the Montgomery domain. + * * Preconditions: * 0 ≤ eval arg1 < m * Postconditions: * eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) * eval (from_montgomery arg1)) mod m * 0 ≤ eval out1 < m * - * Input Bounds: - * arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] - * Output Bounds: - * out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] */ static void -fiat_secp384r1_square(uint64_t out1[6], const uint64_t arg1[6]) +fiat_secp384r1_square( + fiat_secp384r1_montgomery_domain_field_element out1, + const fiat_secp384r1_montgomery_domain_field_element arg1) { uint64_t x1; uint64_t x2; @@ -1741,6 +1760,7 @@ fiat_secp384r1_square(uint64_t out1[6], const uint64_t arg1[6]) /* * The function fiat_secp384r1_add adds two field elements in the Montgomery domain. + * * Preconditions: * 0 ≤ eval arg1 < m * 0 ≤ eval arg2 < m @@ -1748,15 +1768,12 @@ fiat_secp384r1_square(uint64_t out1[6], const uint64_t arg1[6]) * eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) + eval (from_montgomery arg2)) mod m * 0 ≤ eval out1 < m * - * Input Bounds: - * arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] - * arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] - * Output Bounds: - * out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] */ static void -fiat_secp384r1_add(uint64_t out1[6], const uint64_t arg1[6], - const uint64_t arg2[6]) +fiat_secp384r1_add( + fiat_secp384r1_montgomery_domain_field_element out1, + const fiat_secp384r1_montgomery_domain_field_element arg1, + const fiat_secp384r1_montgomery_domain_field_element arg2) { uint64_t x1; fiat_secp384r1_uint1 x2; @@ -1824,6 +1841,7 @@ fiat_secp384r1_add(uint64_t out1[6], const uint64_t arg1[6], /* * The function fiat_secp384r1_sub subtracts two field elements in the Montgomery domain. + * * Preconditions: * 0 ≤ eval arg1 < m * 0 ≤ eval arg2 < m @@ -1831,15 +1849,12 @@ fiat_secp384r1_add(uint64_t out1[6], const uint64_t arg1[6], * eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) - eval (from_montgomery arg2)) mod m * 0 ≤ eval out1 < m * - * Input Bounds: - * arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] - * arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] - * Output Bounds: - * out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] */ static void -fiat_secp384r1_sub(uint64_t out1[6], const uint64_t arg1[6], - const uint64_t arg2[6]) +fiat_secp384r1_sub( + fiat_secp384r1_montgomery_domain_field_element out1, + const fiat_secp384r1_montgomery_domain_field_element arg1, + const fiat_secp384r1_montgomery_domain_field_element arg2) { uint64_t x1; fiat_secp384r1_uint1 x2; @@ -1892,19 +1907,18 @@ fiat_secp384r1_sub(uint64_t out1[6], const uint64_t arg1[6], /* * The function fiat_secp384r1_opp negates a field element in the Montgomery domain. + * * Preconditions: * 0 ≤ eval arg1 < m * Postconditions: * eval (from_montgomery out1) mod m = -eval (from_montgomery arg1) mod m * 0 ≤ eval out1 < m * - * Input Bounds: - * arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] - * Output Bounds: - * out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] */ static void -fiat_secp384r1_opp(uint64_t out1[6], const uint64_t arg1[6]) +fiat_secp384r1_opp( + fiat_secp384r1_montgomery_domain_field_element out1, + const fiat_secp384r1_montgomery_domain_field_element arg1) { uint64_t x1; fiat_secp384r1_uint1 x2; @@ -1957,20 +1971,18 @@ fiat_secp384r1_opp(uint64_t out1[6], const uint64_t arg1[6]) /* * The function fiat_secp384r1_from_montgomery translates a field element out of the Montgomery domain. + * * Preconditions: * 0 ≤ eval arg1 < m * Postconditions: * eval out1 mod m = (eval arg1 * ((2^64)⁻¹ mod m)^6) mod m * 0 ≤ eval out1 < m * - * Input Bounds: - * arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] - * Output Bounds: - * out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] */ static void -fiat_secp384r1_from_montgomery(uint64_t out1[6], - const uint64_t arg1[6]) +fiat_secp384r1_from_montgomery( + fiat_secp384r1_non_montgomery_domain_field_element out1, + const fiat_secp384r1_montgomery_domain_field_element arg1) { uint64_t x1; uint64_t x2; @@ -2460,20 +2472,18 @@ fiat_secp384r1_from_montgomery(uint64_t out1[6], /* * The function fiat_secp384r1_to_montgomery translates a field element into the Montgomery domain. + * * Preconditions: * 0 ≤ eval arg1 < m * Postconditions: * eval (from_montgomery out1) mod m = eval arg1 mod m * 0 ≤ eval out1 < m * - * Input Bounds: - * arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] - * Output Bounds: - * out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] */ static void -fiat_secp384r1_to_montgomery(uint64_t out1[6], - const uint64_t arg1[6]) +fiat_secp384r1_to_montgomery( + fiat_secp384r1_montgomery_domain_field_element out1, + const fiat_secp384r1_non_montgomery_domain_field_element arg1) { uint64_t x1; uint64_t x2; @@ -3117,6 +3127,7 @@ fiat_secp384r1_to_montgomery(uint64_t out1[6], /* * The function fiat_secp384r1_nonzero outputs a single non-zero word if the input is non-zero and zero otherwise. + * * Preconditions: * 0 ≤ eval arg1 < m * Postconditions: @@ -3138,6 +3149,7 @@ fiat_secp384r1_nonzero(uint64_t *out1, const uint64_t arg1[6]) /* * The function fiat_secp384r1_selectznz is a multi-limb conditional select. + * * Postconditions: * eval out1 = (if arg1 = 0 then eval arg2 else eval arg3) * @@ -3176,6 +3188,7 @@ fiat_secp384r1_selectznz(uint64_t out1[6], /* * The function fiat_secp384r1_to_bytes serializes a field element NOT in the Montgomery domain to bytes in little-endian order. + * * Preconditions: * 0 ≤ eval arg1 < m * Postconditions: @@ -3421,6 +3434,7 @@ fiat_secp384r1_to_bytes(uint8_t out1[48], const uint64_t arg1[6]) /* * The function fiat_secp384r1_from_bytes deserializes a field element NOT in the Montgomery domain from bytes in little-endian order. + * * Preconditions: * 0 ≤ bytes_eval arg1 < m * Postconditions: @@ -3624,75 +3638,390 @@ fiat_secp384r1_from_bytes(uint64_t out1[6], out1[5] = x90; } -/* END verbatim fiat code */ - -/*- - * Finite field inversion via FLT. - * NB: this is not a real Fiat function, just named that way for consistency. - * Autogenerated: ecp/secp384r1/fe_inv.op3 - * custom repunit addition chain +/* + * The function fiat_secp384r1_divstep computes a divstep. + * + * Preconditions: + * 0 ≤ eval arg4 < m + * 0 ≤ eval arg5 < m + * Postconditions: + * out1 = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then 1 - arg1 else 1 + arg1) + * twos_complement_eval out2 = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then twos_complement_eval arg3 else twos_complement_eval arg2) + * twos_complement_eval out3 = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then ⌊(twos_complement_eval arg3 - twos_complement_eval arg2) / 2⌋ else ⌊(twos_complement_eval arg3 + (twos_complement_eval arg3 mod 2) * twos_complement_eval arg2) / 2⌋) + * eval (from_montgomery out4) mod m = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then (2 * eval (from_montgomery arg5)) mod m else (2 * eval (from_montgomery arg4)) mod m) + * eval (from_montgomery out5) mod m = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then (eval (from_montgomery arg4) - eval (from_montgomery arg4)) mod m else (eval (from_montgomery arg5) + (twos_complement_eval arg3 mod 2) * eval (from_montgomery arg4)) mod m) + * 0 ≤ eval out5 < m + * 0 ≤ eval out5 < m + * 0 ≤ eval out2 < m + * 0 ≤ eval out3 < m + * + * Input Bounds: + * arg1: [0x0 ~> 0xffffffffffffffff] + * arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * arg3: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * arg4: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * arg5: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * Output Bounds: + * out1: [0x0 ~> 0xffffffffffffffff] + * out2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * out3: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * out4: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * out5: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] */ static void -fiat_secp384r1_inv(fe_t output, const fe_t t1) +fiat_secp384r1_divstep( + uint64_t *out1, uint64_t out2[7], uint64_t out3[7], uint64_t out4[6], + uint64_t out5[6], uint64_t arg1, const uint64_t arg2[7], + const uint64_t arg3[7], const uint64_t arg4[6], const uint64_t arg5[6]) { - int i; - /* temporary variables */ - fe_t acc, t10, t170, t2, t20, t255, t30, t32, t4, t64, t8, t84, t85; - - fiat_secp384r1_square(acc, t1); - fiat_secp384r1_mul(t2, acc, t1); - fiat_secp384r1_square(acc, t2); - fiat_secp384r1_square(acc, acc); - fiat_secp384r1_mul(t4, acc, t2); - fiat_secp384r1_square(acc, t4); - for (i = 0; i < 3; i++) - fiat_secp384r1_square(acc, acc); - fiat_secp384r1_mul(t8, acc, t4); - fiat_secp384r1_square(acc, t8); - fiat_secp384r1_square(acc, acc); - fiat_secp384r1_mul(t10, acc, t2); - fiat_secp384r1_square(acc, t10); - for (i = 0; i < 9; i++) - fiat_secp384r1_square(acc, acc); - fiat_secp384r1_mul(t20, acc, t10); - fiat_secp384r1_square(acc, t20); - for (i = 0; i < 9; i++) - fiat_secp384r1_square(acc, acc); - fiat_secp384r1_mul(t30, acc, t10); - fiat_secp384r1_square(acc, t30); - fiat_secp384r1_square(acc, acc); - fiat_secp384r1_mul(t32, acc, t2); - fiat_secp384r1_square(acc, t32); - for (i = 0; i < 31; i++) - fiat_secp384r1_square(acc, acc); - fiat_secp384r1_mul(t64, acc, t32); - fiat_secp384r1_square(acc, t64); - for (i = 0; i < 19; i++) - fiat_secp384r1_square(acc, acc); - fiat_secp384r1_mul(t84, acc, t20); - fiat_secp384r1_square(acc, t84); - fiat_secp384r1_mul(t85, acc, t1); - fiat_secp384r1_square(acc, t85); - for (i = 0; i < 84; i++) - fiat_secp384r1_square(acc, acc); - fiat_secp384r1_mul(t170, acc, t85); - fiat_secp384r1_square(acc, t170); - for (i = 0; i < 84; i++) - fiat_secp384r1_square(acc, acc); - fiat_secp384r1_mul(t255, acc, t85); - fiat_secp384r1_square(acc, t255); - for (i = 0; i < 32; i++) - fiat_secp384r1_square(acc, acc); - fiat_secp384r1_mul(acc, acc, t32); - for (i = 0; i < 94; i++) - fiat_secp384r1_square(acc, acc); - fiat_secp384r1_mul(acc, acc, t30); - for (i = 0; i < 2; i++) - fiat_secp384r1_square(acc, acc); - fiat_secp384r1_mul(output, acc, t1); + uint64_t x1; + fiat_secp384r1_uint1 x2; + fiat_secp384r1_uint1 x3; + uint64_t x4; + fiat_secp384r1_uint1 x5; + uint64_t x6; + uint64_t x7; + uint64_t x8; + uint64_t x9; + uint64_t x10; + uint64_t x11; + uint64_t x12; + uint64_t x13; + uint64_t x14; + fiat_secp384r1_uint1 x15; + uint64_t x16; + fiat_secp384r1_uint1 x17; + uint64_t x18; + fiat_secp384r1_uint1 x19; + uint64_t x20; + fiat_secp384r1_uint1 x21; + uint64_t x22; + fiat_secp384r1_uint1 x23; + uint64_t x24; + fiat_secp384r1_uint1 x25; + uint64_t x26; + fiat_secp384r1_uint1 x27; + uint64_t x28; + uint64_t x29; + uint64_t x30; + uint64_t x31; + uint64_t x32; + uint64_t x33; + uint64_t x34; + uint64_t x35; + uint64_t x36; + uint64_t x37; + uint64_t x38; + uint64_t x39; + uint64_t x40; + uint64_t x41; + fiat_secp384r1_uint1 x42; + uint64_t x43; + fiat_secp384r1_uint1 x44; + uint64_t x45; + fiat_secp384r1_uint1 x46; + uint64_t x47; + fiat_secp384r1_uint1 x48; + uint64_t x49; + fiat_secp384r1_uint1 x50; + uint64_t x51; + fiat_secp384r1_uint1 x52; + uint64_t x53; + fiat_secp384r1_uint1 x54; + uint64_t x55; + fiat_secp384r1_uint1 x56; + uint64_t x57; + fiat_secp384r1_uint1 x58; + uint64_t x59; + fiat_secp384r1_uint1 x60; + uint64_t x61; + fiat_secp384r1_uint1 x62; + uint64_t x63; + fiat_secp384r1_uint1 x64; + uint64_t x65; + fiat_secp384r1_uint1 x66; + uint64_t x67; + uint64_t x68; + uint64_t x69; + uint64_t x70; + uint64_t x71; + uint64_t x72; + uint64_t x73; + fiat_secp384r1_uint1 x74; + uint64_t x75; + fiat_secp384r1_uint1 x76; + uint64_t x77; + fiat_secp384r1_uint1 x78; + uint64_t x79; + fiat_secp384r1_uint1 x80; + uint64_t x81; + fiat_secp384r1_uint1 x82; + uint64_t x83; + fiat_secp384r1_uint1 x84; + uint64_t x85; + uint64_t x86; + fiat_secp384r1_uint1 x87; + uint64_t x88; + fiat_secp384r1_uint1 x89; + uint64_t x90; + fiat_secp384r1_uint1 x91; + uint64_t x92; + fiat_secp384r1_uint1 x93; + uint64_t x94; + fiat_secp384r1_uint1 x95; + uint64_t x96; + fiat_secp384r1_uint1 x97; + uint64_t x98; + uint64_t x99; + uint64_t x100; + uint64_t x101; + uint64_t x102; + uint64_t x103; + fiat_secp384r1_uint1 x104; + uint64_t x105; + uint64_t x106; + uint64_t x107; + uint64_t x108; + uint64_t x109; + uint64_t x110; + uint64_t x111; + uint64_t x112; + fiat_secp384r1_uint1 x113; + uint64_t x114; + fiat_secp384r1_uint1 x115; + uint64_t x116; + fiat_secp384r1_uint1 x117; + uint64_t x118; + fiat_secp384r1_uint1 x119; + uint64_t x120; + fiat_secp384r1_uint1 x121; + uint64_t x122; + fiat_secp384r1_uint1 x123; + uint64_t x124; + fiat_secp384r1_uint1 x125; + uint64_t x126; + uint64_t x127; + uint64_t x128; + uint64_t x129; + uint64_t x130; + uint64_t x131; + uint64_t x132; + fiat_secp384r1_uint1 x133; + uint64_t x134; + fiat_secp384r1_uint1 x135; + uint64_t x136; + fiat_secp384r1_uint1 x137; + uint64_t x138; + fiat_secp384r1_uint1 x139; + uint64_t x140; + fiat_secp384r1_uint1 x141; + uint64_t x142; + fiat_secp384r1_uint1 x143; + uint64_t x144; + fiat_secp384r1_uint1 x145; + uint64_t x146; + fiat_secp384r1_uint1 x147; + uint64_t x148; + fiat_secp384r1_uint1 x149; + uint64_t x150; + fiat_secp384r1_uint1 x151; + uint64_t x152; + fiat_secp384r1_uint1 x153; + uint64_t x154; + fiat_secp384r1_uint1 x155; + uint64_t x156; + fiat_secp384r1_uint1 x157; + uint64_t x158; + fiat_secp384r1_uint1 x159; + uint64_t x160; + uint64_t x161; + uint64_t x162; + uint64_t x163; + uint64_t x164; + uint64_t x165; + uint64_t x166; + uint64_t x167; + uint64_t x168; + uint64_t x169; + uint64_t x170; + uint64_t x171; + uint64_t x172; + uint64_t x173; + uint64_t x174; + uint64_t x175; + uint64_t x176; + uint64_t x177; + uint64_t x178; + fiat_secp384r1_addcarryx_u64(&x1, &x2, 0x0, (~arg1), 0x1); + x3 = (fiat_secp384r1_uint1)((fiat_secp384r1_uint1)(x1 >> 63) & + (fiat_secp384r1_uint1)((arg3[0]) & 0x1)); + fiat_secp384r1_addcarryx_u64(&x4, &x5, 0x0, (~arg1), 0x1); + fiat_secp384r1_cmovznz_u64(&x6, x3, arg1, x4); + fiat_secp384r1_cmovznz_u64(&x7, x3, (arg2[0]), (arg3[0])); + fiat_secp384r1_cmovznz_u64(&x8, x3, (arg2[1]), (arg3[1])); + fiat_secp384r1_cmovznz_u64(&x9, x3, (arg2[2]), (arg3[2])); + fiat_secp384r1_cmovznz_u64(&x10, x3, (arg2[3]), (arg3[3])); + fiat_secp384r1_cmovznz_u64(&x11, x3, (arg2[4]), (arg3[4])); + fiat_secp384r1_cmovznz_u64(&x12, x3, (arg2[5]), (arg3[5])); + fiat_secp384r1_cmovznz_u64(&x13, x3, (arg2[6]), (arg3[6])); + fiat_secp384r1_addcarryx_u64(&x14, &x15, 0x0, 0x1, (~(arg2[0]))); + fiat_secp384r1_addcarryx_u64(&x16, &x17, x15, 0x0, (~(arg2[1]))); + fiat_secp384r1_addcarryx_u64(&x18, &x19, x17, 0x0, (~(arg2[2]))); + fiat_secp384r1_addcarryx_u64(&x20, &x21, x19, 0x0, (~(arg2[3]))); + fiat_secp384r1_addcarryx_u64(&x22, &x23, x21, 0x0, (~(arg2[4]))); + fiat_secp384r1_addcarryx_u64(&x24, &x25, x23, 0x0, (~(arg2[5]))); + fiat_secp384r1_addcarryx_u64(&x26, &x27, x25, 0x0, (~(arg2[6]))); + fiat_secp384r1_cmovznz_u64(&x28, x3, (arg3[0]), x14); + fiat_secp384r1_cmovznz_u64(&x29, x3, (arg3[1]), x16); + fiat_secp384r1_cmovznz_u64(&x30, x3, (arg3[2]), x18); + fiat_secp384r1_cmovznz_u64(&x31, x3, (arg3[3]), x20); + fiat_secp384r1_cmovznz_u64(&x32, x3, (arg3[4]), x22); + fiat_secp384r1_cmovznz_u64(&x33, x3, (arg3[5]), x24); + fiat_secp384r1_cmovznz_u64(&x34, x3, (arg3[6]), x26); + fiat_secp384r1_cmovznz_u64(&x35, x3, (arg4[0]), (arg5[0])); + fiat_secp384r1_cmovznz_u64(&x36, x3, (arg4[1]), (arg5[1])); + fiat_secp384r1_cmovznz_u64(&x37, x3, (arg4[2]), (arg5[2])); + fiat_secp384r1_cmovznz_u64(&x38, x3, (arg4[3]), (arg5[3])); + fiat_secp384r1_cmovznz_u64(&x39, x3, (arg4[4]), (arg5[4])); + fiat_secp384r1_cmovznz_u64(&x40, x3, (arg4[5]), (arg5[5])); + fiat_secp384r1_addcarryx_u64(&x41, &x42, 0x0, x35, x35); + fiat_secp384r1_addcarryx_u64(&x43, &x44, x42, x36, x36); + fiat_secp384r1_addcarryx_u64(&x45, &x46, x44, x37, x37); + fiat_secp384r1_addcarryx_u64(&x47, &x48, x46, x38, x38); + fiat_secp384r1_addcarryx_u64(&x49, &x50, x48, x39, x39); + fiat_secp384r1_addcarryx_u64(&x51, &x52, x50, x40, x40); + fiat_secp384r1_subborrowx_u64(&x53, &x54, 0x0, x41, UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u64(&x55, &x56, x54, x43, + UINT64_C(0xffffffff00000000)); + fiat_secp384r1_subborrowx_u64(&x57, &x58, x56, x45, + UINT64_C(0xfffffffffffffffe)); + fiat_secp384r1_subborrowx_u64(&x59, &x60, x58, x47, + UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_subborrowx_u64(&x61, &x62, x60, x49, + UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_subborrowx_u64(&x63, &x64, x62, x51, + UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_subborrowx_u64(&x65, &x66, x64, x52, 0x0); + x67 = (arg4[5]); + x68 = (arg4[4]); + x69 = (arg4[3]); + x70 = (arg4[2]); + x71 = (arg4[1]); + x72 = (arg4[0]); + fiat_secp384r1_subborrowx_u64(&x73, &x74, 0x0, 0x0, x72); + fiat_secp384r1_subborrowx_u64(&x75, &x76, x74, 0x0, x71); + fiat_secp384r1_subborrowx_u64(&x77, &x78, x76, 0x0, x70); + fiat_secp384r1_subborrowx_u64(&x79, &x80, x78, 0x0, x69); + fiat_secp384r1_subborrowx_u64(&x81, &x82, x80, 0x0, x68); + fiat_secp384r1_subborrowx_u64(&x83, &x84, x82, 0x0, x67); + fiat_secp384r1_cmovznz_u64(&x85, x84, 0x0, UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_addcarryx_u64(&x86, &x87, 0x0, x73, + (x85 & UINT32_C(0xffffffff))); + fiat_secp384r1_addcarryx_u64(&x88, &x89, x87, x75, + (x85 & UINT64_C(0xffffffff00000000))); + fiat_secp384r1_addcarryx_u64(&x90, &x91, x89, x77, + (x85 & UINT64_C(0xfffffffffffffffe))); + fiat_secp384r1_addcarryx_u64(&x92, &x93, x91, x79, x85); + fiat_secp384r1_addcarryx_u64(&x94, &x95, x93, x81, x85); + fiat_secp384r1_addcarryx_u64(&x96, &x97, x95, x83, x85); + fiat_secp384r1_cmovznz_u64(&x98, x3, (arg5[0]), x86); + fiat_secp384r1_cmovznz_u64(&x99, x3, (arg5[1]), x88); + fiat_secp384r1_cmovznz_u64(&x100, x3, (arg5[2]), x90); + fiat_secp384r1_cmovznz_u64(&x101, x3, (arg5[3]), x92); + fiat_secp384r1_cmovznz_u64(&x102, x3, (arg5[4]), x94); + fiat_secp384r1_cmovznz_u64(&x103, x3, (arg5[5]), x96); + x104 = (fiat_secp384r1_uint1)(x28 & 0x1); + fiat_secp384r1_cmovznz_u64(&x105, x104, 0x0, x7); + fiat_secp384r1_cmovznz_u64(&x106, x104, 0x0, x8); + fiat_secp384r1_cmovznz_u64(&x107, x104, 0x0, x9); + fiat_secp384r1_cmovznz_u64(&x108, x104, 0x0, x10); + fiat_secp384r1_cmovznz_u64(&x109, x104, 0x0, x11); + fiat_secp384r1_cmovznz_u64(&x110, x104, 0x0, x12); + fiat_secp384r1_cmovznz_u64(&x111, x104, 0x0, x13); + fiat_secp384r1_addcarryx_u64(&x112, &x113, 0x0, x28, x105); + fiat_secp384r1_addcarryx_u64(&x114, &x115, x113, x29, x106); + fiat_secp384r1_addcarryx_u64(&x116, &x117, x115, x30, x107); + fiat_secp384r1_addcarryx_u64(&x118, &x119, x117, x31, x108); + fiat_secp384r1_addcarryx_u64(&x120, &x121, x119, x32, x109); + fiat_secp384r1_addcarryx_u64(&x122, &x123, x121, x33, x110); + fiat_secp384r1_addcarryx_u64(&x124, &x125, x123, x34, x111); + fiat_secp384r1_cmovznz_u64(&x126, x104, 0x0, x35); + fiat_secp384r1_cmovznz_u64(&x127, x104, 0x0, x36); + fiat_secp384r1_cmovznz_u64(&x128, x104, 0x0, x37); + fiat_secp384r1_cmovznz_u64(&x129, x104, 0x0, x38); + fiat_secp384r1_cmovznz_u64(&x130, x104, 0x0, x39); + fiat_secp384r1_cmovznz_u64(&x131, x104, 0x0, x40); + fiat_secp384r1_addcarryx_u64(&x132, &x133, 0x0, x98, x126); + fiat_secp384r1_addcarryx_u64(&x134, &x135, x133, x99, x127); + fiat_secp384r1_addcarryx_u64(&x136, &x137, x135, x100, x128); + fiat_secp384r1_addcarryx_u64(&x138, &x139, x137, x101, x129); + fiat_secp384r1_addcarryx_u64(&x140, &x141, x139, x102, x130); + fiat_secp384r1_addcarryx_u64(&x142, &x143, x141, x103, x131); + fiat_secp384r1_subborrowx_u64(&x144, &x145, 0x0, x132, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u64(&x146, &x147, x145, x134, + UINT64_C(0xffffffff00000000)); + fiat_secp384r1_subborrowx_u64(&x148, &x149, x147, x136, + UINT64_C(0xfffffffffffffffe)); + fiat_secp384r1_subborrowx_u64(&x150, &x151, x149, x138, + UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_subborrowx_u64(&x152, &x153, x151, x140, + UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_subborrowx_u64(&x154, &x155, x153, x142, + UINT64_C(0xffffffffffffffff)); + fiat_secp384r1_subborrowx_u64(&x156, &x157, x155, x143, 0x0); + fiat_secp384r1_addcarryx_u64(&x158, &x159, 0x0, x6, 0x1); + x160 = ((x112 >> 1) | ((x114 << 63) & UINT64_C(0xffffffffffffffff))); + x161 = ((x114 >> 1) | ((x116 << 63) & UINT64_C(0xffffffffffffffff))); + x162 = ((x116 >> 1) | ((x118 << 63) & UINT64_C(0xffffffffffffffff))); + x163 = ((x118 >> 1) | ((x120 << 63) & UINT64_C(0xffffffffffffffff))); + x164 = ((x120 >> 1) | ((x122 << 63) & UINT64_C(0xffffffffffffffff))); + x165 = ((x122 >> 1) | ((x124 << 63) & UINT64_C(0xffffffffffffffff))); + x166 = ((x124 & UINT64_C(0x8000000000000000)) | (x124 >> 1)); + fiat_secp384r1_cmovznz_u64(&x167, x66, x53, x41); + fiat_secp384r1_cmovznz_u64(&x168, x66, x55, x43); + fiat_secp384r1_cmovznz_u64(&x169, x66, x57, x45); + fiat_secp384r1_cmovznz_u64(&x170, x66, x59, x47); + fiat_secp384r1_cmovznz_u64(&x171, x66, x61, x49); + fiat_secp384r1_cmovznz_u64(&x172, x66, x63, x51); + fiat_secp384r1_cmovznz_u64(&x173, x157, x144, x132); + fiat_secp384r1_cmovznz_u64(&x174, x157, x146, x134); + fiat_secp384r1_cmovznz_u64(&x175, x157, x148, x136); + fiat_secp384r1_cmovznz_u64(&x176, x157, x150, x138); + fiat_secp384r1_cmovznz_u64(&x177, x157, x152, x140); + fiat_secp384r1_cmovznz_u64(&x178, x157, x154, x142); + *out1 = x158; + out2[0] = x7; + out2[1] = x8; + out2[2] = x9; + out2[3] = x10; + out2[4] = x11; + out2[5] = x12; + out2[6] = x13; + out3[0] = x160; + out3[1] = x161; + out3[2] = x162; + out3[3] = x163; + out3[4] = x164; + out3[5] = x165; + out3[6] = x166; + out4[0] = x167; + out4[1] = x168; + out4[2] = x169; + out4[3] = x170; + out4[4] = x171; + out4[5] = x172; + out5[0] = x173; + out5[1] = x174; + out5[2] = x175; + out5[3] = x176; + out5[4] = x177; + out5[5] = x178; } -/* curve coefficient constants */ +/* END verbatim fiat code */ + +/* curve-related constants */ static const limb_t const_one[6] = { UINT64_C(0xFFFFFFFF00000001), UINT64_C(0x00000000FFFFFFFF), @@ -3706,6 +4035,18 @@ static const limb_t const_b[6] = { UINT64_C(0xB62B21F41F022094), UINT64_C(0xCD08114B604FBFF9) }; +static const limb_t const_divstep[6] = { + UINT64_C(0xFFFFC80000005000), UINT64_C(0xFFFFB3FFFFFF83FF), + UINT64_C(0xFFFFF7FFFFFFFFFF), UINT64_C(0xFFFFEBFFFFFFEFFF), + UINT64_C(0x00000BFFFFFFF3FF), UINT64_C(0x0000500000003000) +}; + +static const limb_t const_psat[6] = { + UINT64_C(0x00000000FFFFFFFF), UINT64_C(0xFFFFFFFF00000000), + UINT64_C(0xFFFFFFFFFFFFFFFE), UINT64_C(0xFFFFFFFFFFFFFFFF), + UINT64_C(0xFFFFFFFFFFFFFFFF), UINT64_C(0xFFFFFFFFFFFFFFFF) +}; + /* LUT for scalar multiplication by comb interleaving */ static const pt_aff_t lut_cmb[21][16] = { { @@ -5769,6 +6110,42 @@ static const pt_aff_t lut_cmb[21][16] = { }; /*- + * Finite field inversion. + * Computed with Bernstein-Yang algorithm. + * https://tches.iacr.org/index.php/TCHES/article/view/8298 + * Based on https://github.com/mit-plv/fiat-crypto/tree/master/inversion/c + * NB: this is not a real fiat-crypto function, just named that way for consistency. + */ +static void +fiat_secp384r1_inv(fe_t output, const fe_t t1) +{ + int i; + fe_t v1, r1, v2; + limb_t *r2 = output; + limb_t f1[LIMB_CNT + 1], g1[LIMB_CNT + 1], f2[LIMB_CNT + 1], + g2[LIMB_CNT + 1]; + limb_t d2, d1 = 1; + + fe_copy(g1, t1); + g1[LIMB_CNT] = 0; + fe_copy(f1, const_psat); + f1[LIMB_CNT] = 0; + fe_copy(r1, const_one); + fe_set_zero(v1); + + /* 1110 divstep iterations */ + for (i = 0; i < 555; i++) { + fiat_secp384r1_divstep(&d2, f2, g2, v2, r2, d1, f1, g1, v1, r1); + fiat_secp384r1_divstep(&d1, f1, g1, v1, r1, d2, f2, g2, v2, r2); + } + + fiat_secp384r1_opp(output, v1); + fiat_secp384r1_selectznz(output, f1[LIMB_CNT] >> (LIMB_BITS - 1), v1, + output); + fiat_secp384r1_mul(output, output, const_divstep); +} + +/*- * Q := 2P, both projective, Q and P same pointers OK * Autogenerated: op3/dbl_proj.op3 * https://eprint.iacr.org/2015/1060 Alg 6 @@ -6074,7 +6451,7 @@ var_smul_wnaf_two(pt_aff_t *out, const unsigned char a[48], int i, d, is_neg, is_inf = 1, flipped = 0; int8_t anaf[385] = { 0 }; int8_t bnaf[385] = { 0 }; - pt_prj_t Q = { { 0 } }; + pt_prj_t Q = { { 0 }, { 0 }, { 0 } }; pt_prj_t precomp[DRADIX / 2]; precomp_wnaf(precomp, P); @@ -6136,6 +6513,8 @@ var_smul_wnaf_two(pt_aff_t *out, const unsigned char a[48], /*- * Variable point scalar multiplication with "regular" wnaf. + * Here "regular" means _no zeroes_, so the sequence of + * EC arithmetic ops is fixed. */ static void var_smul_rwnaf(pt_aff_t *out, const unsigned char scalar[48], @@ -6143,14 +6522,14 @@ var_smul_rwnaf(pt_aff_t *out, const unsigned char scalar[48], { int i, j, d, diff, is_neg; int8_t rnaf[77] = { 0 }; - pt_prj_t Q = { { 0 } }, lut = { { 0 } }; + pt_prj_t Q = { { 0 }, { 0 }, { 0 } }, lut = { { 0 }, { 0 }, { 0 } }; pt_prj_t precomp[DRADIX / 2]; precomp_wnaf(precomp, P); scalar_rwnaf(rnaf, scalar); #if defined(_MSC_VER) -/* result still unsigned: yes we know */ + /* result still unsigned: yes we know */ #pragma warning(push) #pragma warning(disable : 4146) #endif @@ -6212,8 +6591,8 @@ fixed_smul_cmb(pt_aff_t *out, const unsigned char scalar[48]) { int i, j, k, d, diff, is_neg = 0; int8_t rnaf[77] = { 0 }; - pt_prj_t Q = { { 0 } }, R = { { 0 } }; - pt_aff_t lut = { { 0 } }; + pt_prj_t Q = { { 0 }, { 0 }, { 0 } }, R = { { 0 }, { 0 }, { 0 } }; + pt_aff_t lut = { { 0 }, { 0 } }; scalar_rwnaf(rnaf, scalar); @@ -6223,7 +6602,7 @@ fixed_smul_cmb(pt_aff_t *out, const unsigned char scalar[48]) fe_set_zero(Q.Z); #if defined(_MSC_VER) -/* result still unsigned: yes we know */ + /* result still unsigned: yes we know */ #pragma warning(push) #pragma warning(disable : 4146) #endif @@ -6277,10 +6656,11 @@ fixed_smul_cmb(pt_aff_t *out, const unsigned char scalar[48]) * Everything is LE byte ordering. */ static void -point_mul_two(unsigned char outx[48], unsigned char outy[48], - const unsigned char a[48], const unsigned char b[48], - const unsigned char inx[48], - const unsigned char iny[48]) +point_mul_two_secp384r1(unsigned char outx[48], unsigned char outy[48], + const unsigned char a[48], + const unsigned char b[48], + const unsigned char inx[48], + const unsigned char iny[48]) { pt_aff_t P; @@ -6303,8 +6683,8 @@ point_mul_two(unsigned char outx[48], unsigned char outy[48], * Everything is LE byte ordering. */ static void -point_mul_g(unsigned char outx[48], unsigned char outy[48], - const unsigned char scalar[48]) +point_mul_g_secp384r1(unsigned char outx[48], unsigned char outy[48], + const unsigned char scalar[48]) { pt_aff_t P; @@ -6323,10 +6703,10 @@ point_mul_g(unsigned char outx[48], unsigned char outy[48], * Everything is LE byte ordering. */ static void -point_mul(unsigned char outx[48], unsigned char outy[48], - const unsigned char scalar[48], - const unsigned char inx[48], - const unsigned char iny[48]) +point_mul_secp384r1(unsigned char outx[48], unsigned char outy[48], + const unsigned char scalar[48], + const unsigned char inx[48], + const unsigned char iny[48]) { pt_aff_t P; @@ -6428,8 +6808,8 @@ point_mul(unsigned char outx[48], unsigned char outy[48], } while (0) static mp_err -point_mul_g_secp384r1(const mp_int *n, mp_int *out_x, - mp_int *out_y, const ECGroup *group) +point_mul_g_secp384r1_wrap(const mp_int *n, mp_int *out_x, + mp_int *out_y, const ECGroup *group) { unsigned char b_x[48]; unsigned char b_y[48]; @@ -6444,7 +6824,7 @@ point_mul_g_secp384r1(const mp_int *n, mp_int *out_x, MP_CHECKOK(mp_to_fixlen_octets(n, b_n, 48)); MP_BE2LE(b_n); - point_mul_g(b_x, b_y, b_n); + point_mul_g_secp384r1(b_x, b_y, b_n); MP_BE2LE(b_x); MP_BE2LE(b_y); MP_CHECKOK(mp_read_unsigned_octets(out_x, b_x, 48)); @@ -6455,9 +6835,9 @@ CLEANUP: } static mp_err -point_mul_secp384r1(const mp_int *n, const mp_int *in_x, - const mp_int *in_y, mp_int *out_x, - mp_int *out_y, const ECGroup *group) +point_mul_secp384r1_wrap(const mp_int *n, const mp_int *in_x, + const mp_int *in_y, mp_int *out_x, + mp_int *out_y, const ECGroup *group) { unsigned char b_x[48]; unsigned char b_y[48]; @@ -6478,7 +6858,7 @@ point_mul_secp384r1(const mp_int *n, const mp_int *in_x, MP_BE2LE(b_x); MP_BE2LE(b_y); MP_BE2LE(b_n); - point_mul(b_x, b_y, b_n, b_x, b_y); + point_mul_secp384r1(b_x, b_y, b_n, b_x, b_y); MP_BE2LE(b_x); MP_BE2LE(b_y); MP_CHECKOK(mp_read_unsigned_octets(out_x, b_x, 48)); @@ -6489,10 +6869,11 @@ CLEANUP: } static mp_err -point_mul_two_secp384r1(const mp_int *n1, const mp_int *n2, - const mp_int *in_x, const mp_int *in_y, - mp_int *out_x, mp_int *out_y, - const ECGroup *group) +point_mul_two_secp384r1_wrap(const mp_int *n1, const mp_int *n2, + const mp_int *in_x, + const mp_int *in_y, mp_int *out_x, + mp_int *out_y, + const ECGroup *group) { unsigned char b_x[48]; unsigned char b_y[48]; @@ -6502,11 +6883,11 @@ point_mul_two_secp384r1(const mp_int *n1, const mp_int *n2, /* If n2 == NULL or 0, this is just a base-point multiplication. */ if (n2 == NULL || mp_cmp_z(n2) == MP_EQ) - return point_mul_g_secp384r1(n1, out_x, out_y, group); + return point_mul_g_secp384r1_wrap(n1, out_x, out_y, group); /* If n1 == NULL or 0, this is just an arbitary-point multiplication. */ if (n1 == NULL || mp_cmp_z(n1) == MP_EQ) - return point_mul_secp384r1(n2, in_x, in_y, out_x, out_y, group); + return point_mul_secp384r1_wrap(n2, in_x, in_y, out_x, out_y, group); ARGCHK(in_x != NULL && in_y != NULL && out_x != NULL && out_y != NULL, MP_BADARG); @@ -6524,7 +6905,7 @@ point_mul_two_secp384r1(const mp_int *n1, const mp_int *n2, MP_BE2LE(b_y); MP_BE2LE(b_n1); MP_BE2LE(b_n2); - point_mul_two(b_x, b_y, b_n1, b_n2, b_x, b_y); + point_mul_two_secp384r1(b_x, b_y, b_n1, b_n2, b_x, b_y); MP_BE2LE(b_x); MP_BE2LE(b_y); MP_CHECKOK(mp_read_unsigned_octets(out_x, b_x, 48)); @@ -6538,9 +6919,9 @@ mp_err ec_group_set_secp384r1(ECGroup *group, ECCurveName name) { if (name == ECCurve_NIST_P384) { - group->base_point_mul = &point_mul_g_secp384r1; - group->point_mul = &point_mul_secp384r1; - group->points_mul = &point_mul_two_secp384r1; + group->base_point_mul = &point_mul_g_secp384r1_wrap; + group->point_mul = &point_mul_secp384r1_wrap; + group->points_mul = &point_mul_two_secp384r1_wrap; } return MP_OKAY; } @@ -6611,12 +6992,27 @@ typedef struct { /* return values. */ /* */ /* Computed values: */ -/* eval z = z[0] + (z[1] << 32) + (z[2] << 64) + (z[3] << 96) + (z[4] << 128) + (z[5] << 160) + (z[6] << 192) + (z[7] << 224) + (z[8] << 256) + (z[9] << 0x120) + (z[10] << 0x140) + (z[11] << 0x160) */ -/* bytes_eval z = z[0] + (z[1] << 8) + (z[2] << 16) + (z[3] << 24) + (z[4] << 32) + (z[5] << 40) + (z[6] << 48) + (z[7] << 56) + (z[8] << 64) + (z[9] << 72) + (z[10] << 80) + (z[11] << 88) + (z[12] << 96) + (z[13] << 104) + (z[14] << 112) + (z[15] << 120) + (z[16] << 128) + (z[17] << 136) + (z[18] << 144) + (z[19] << 152) + (z[20] << 160) + (z[21] << 168) + (z[22] << 176) + (z[23] << 184) + (z[24] << 192) + (z[25] << 200) + (z[26] << 208) + (z[27] << 216) + (z[28] << 224) + (z[29] << 232) + (z[30] << 240) + (z[31] << 248) + (z[32] << 256) + (z[33] << 0x108) + (z[34] << 0x110) + (z[35] << 0x118) + (z[36] << 0x120) + (z[37] << 0x128) + (z[38] << 0x130) + (z[39] << 0x138) + (z[40] << 0x140) + (z[41] << 0x148) + (z[42] << 0x150) + (z[43] << 0x158) + (z[44] << 0x160) + (z[45] << 0x168) + (z[46] << 0x170) + (z[47] << 0x178) */ +/* eval z = z[0] + (z[1] << 32) + (z[2] << 64) + (z[3] << 96) + (z[4] << 128) + (z[5] << 160) + (z[6] << 192) + (z[7] << 224) + (z[8] << 256) + (z[9] << 0x120) + (z[10] << 0x140) + (z[11] << 0x160) */ +/* bytes_eval z = z[0] + (z[1] << 8) + (z[2] << 16) + (z[3] << 24) + (z[4] << 32) + (z[5] << 40) + (z[6] << 48) + (z[7] << 56) + (z[8] << 64) + (z[9] << 72) + (z[10] << 80) + (z[11] << 88) + (z[12] << 96) + (z[13] << 104) + (z[14] << 112) + (z[15] << 120) + (z[16] << 128) + (z[17] << 136) + (z[18] << 144) + (z[19] << 152) + (z[20] << 160) + (z[21] << 168) + (z[22] << 176) + (z[23] << 184) + (z[24] << 192) + (z[25] << 200) + (z[26] << 208) + (z[27] << 216) + (z[28] << 224) + (z[29] << 232) + (z[30] << 240) + (z[31] << 248) + (z[32] << 256) + (z[33] << 0x108) + (z[34] << 0x110) + (z[35] << 0x118) + (z[36] << 0x120) + (z[37] << 0x128) + (z[38] << 0x130) + (z[39] << 0x138) + (z[40] << 0x140) + (z[41] << 0x148) + (z[42] << 0x150) + (z[43] << 0x158) + (z[44] << 0x160) + (z[45] << 0x168) + (z[46] << 0x170) + (z[47] << 0x178) */ +/* twos_complement_eval z = let x1 := z[0] + (z[1] << 32) + (z[2] << 64) + (z[3] << 96) + (z[4] << 128) + (z[5] << 160) + (z[6] << 192) + (z[7] << 224) + (z[8] << 256) + (z[9] << 0x120) + (z[10] << 0x140) + (z[11] << 0x160) in */ +/* if x1 & (2^384-1) < 2^383 then x1 & (2^384-1) else (x1 & (2^384-1)) - 2^384 */ #include <stdint.h> typedef unsigned char fiat_secp384r1_uint1; typedef signed char fiat_secp384r1_int1; +#ifdef __GNUC__ +#define FIAT_SECP384R1_FIAT_INLINE __inline__ +#else +#define FIAT_SECP384R1_FIAT_INLINE +#endif + +/* The type fiat_secp384r1_montgomery_domain_field_element is a field element in the Montgomery domain. */ +/* Bounds: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] */ +typedef uint32_t fiat_secp384r1_montgomery_domain_field_element[12]; + +/* The type fiat_secp384r1_non_montgomery_domain_field_element is a field element NOT in the Montgomery domain. */ +/* Bounds: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] */ +typedef uint32_t fiat_secp384r1_non_montgomery_domain_field_element[12]; #if (-1 & 3) != 3 #error "This code only works on a two's complement system" @@ -6637,6 +7033,7 @@ fiat_secp384r1_value_barrier_u32(uint32_t a) /* * The function fiat_secp384r1_addcarryx_u32 is an addition with carry. + * * Postconditions: * out1 = (arg1 + arg2 + arg3) mod 2^32 * out2 = ⌊(arg1 + arg2 + arg3) / 2^32⌋ @@ -6667,6 +7064,7 @@ fiat_secp384r1_addcarryx_u32(uint32_t *out1, /* * The function fiat_secp384r1_subborrowx_u32 is a subtraction with borrow. + * * Postconditions: * out1 = (-arg1 + arg2 + -arg3) mod 2^32 * out2 = -⌊(-arg1 + arg2 + -arg3) / 2^32⌋ @@ -6697,6 +7095,7 @@ fiat_secp384r1_subborrowx_u32(uint32_t *out1, /* * The function fiat_secp384r1_mulx_u32 is a multiplication, returning the full double-width result. + * * Postconditions: * out1 = (arg1 * arg2) mod 2^32 * out2 = ⌊arg1 * arg2 / 2^32⌋ @@ -6724,6 +7123,7 @@ fiat_secp384r1_mulx_u32(uint32_t *out1, uint32_t *out2, /* * The function fiat_secp384r1_cmovznz_u32 is a single-word conditional move. + * * Postconditions: * out1 = (if arg1 = 0 then arg2 else arg3) * @@ -6751,6 +7151,7 @@ fiat_secp384r1_cmovznz_u32(uint32_t *out1, /* * The function fiat_secp384r1_mul multiplies two field elements in the Montgomery domain. + * * Preconditions: * 0 ≤ eval arg1 < m * 0 ≤ eval arg2 < m @@ -6758,15 +7159,12 @@ fiat_secp384r1_cmovznz_u32(uint32_t *out1, * eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) * eval (from_montgomery arg2)) mod m * 0 ≤ eval out1 < m * - * Input Bounds: - * arg1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] - * arg2: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] - * Output Bounds: - * out1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] */ static void -fiat_secp384r1_mul(uint32_t out1[12], const uint32_t arg1[12], - const uint32_t arg2[12]) +fiat_secp384r1_mul( + fiat_secp384r1_montgomery_domain_field_element out1, + const fiat_secp384r1_montgomery_domain_field_element arg1, + const fiat_secp384r1_montgomery_domain_field_element arg2) { uint32_t x1; uint32_t x2; @@ -9324,19 +9722,18 @@ fiat_secp384r1_mul(uint32_t out1[12], const uint32_t arg1[12], /* * The function fiat_secp384r1_square squares a field element in the Montgomery domain. + * * Preconditions: * 0 ≤ eval arg1 < m * Postconditions: * eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) * eval (from_montgomery arg1)) mod m * 0 ≤ eval out1 < m * - * Input Bounds: - * arg1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] - * Output Bounds: - * out1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] */ static void -fiat_secp384r1_square(uint32_t out1[12], const uint32_t arg1[12]) +fiat_secp384r1_square( + fiat_secp384r1_montgomery_domain_field_element out1, + const fiat_secp384r1_montgomery_domain_field_element arg1) { uint32_t x1; uint32_t x2; @@ -11894,6 +12291,7 @@ fiat_secp384r1_square(uint32_t out1[12], const uint32_t arg1[12]) /* * The function fiat_secp384r1_add adds two field elements in the Montgomery domain. + * * Preconditions: * 0 ≤ eval arg1 < m * 0 ≤ eval arg2 < m @@ -11901,15 +12299,12 @@ fiat_secp384r1_square(uint32_t out1[12], const uint32_t arg1[12]) * eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) + eval (from_montgomery arg2)) mod m * 0 ≤ eval out1 < m * - * Input Bounds: - * arg1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] - * arg2: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] - * Output Bounds: - * out1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] */ static void -fiat_secp384r1_add(uint32_t out1[12], const uint32_t arg1[12], - const uint32_t arg2[12]) +fiat_secp384r1_add( + fiat_secp384r1_montgomery_domain_field_element out1, + const fiat_secp384r1_montgomery_domain_field_element arg1, + const fiat_secp384r1_montgomery_domain_field_element arg2) { uint32_t x1; fiat_secp384r1_uint1 x2; @@ -12026,6 +12421,7 @@ fiat_secp384r1_add(uint32_t out1[12], const uint32_t arg1[12], /* * The function fiat_secp384r1_sub subtracts two field elements in the Montgomery domain. + * * Preconditions: * 0 ≤ eval arg1 < m * 0 ≤ eval arg2 < m @@ -12033,15 +12429,12 @@ fiat_secp384r1_add(uint32_t out1[12], const uint32_t arg1[12], * eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) - eval (from_montgomery arg2)) mod m * 0 ≤ eval out1 < m * - * Input Bounds: - * arg1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] - * arg2: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] - * Output Bounds: - * out1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] */ static void -fiat_secp384r1_sub(uint32_t out1[12], const uint32_t arg1[12], - const uint32_t arg2[12]) +fiat_secp384r1_sub( + fiat_secp384r1_montgomery_domain_field_element out1, + const fiat_secp384r1_montgomery_domain_field_element arg1, + const fiat_secp384r1_montgomery_domain_field_element arg2) { uint32_t x1; fiat_secp384r1_uint1 x2; @@ -12134,19 +12527,18 @@ fiat_secp384r1_sub(uint32_t out1[12], const uint32_t arg1[12], /* * The function fiat_secp384r1_opp negates a field element in the Montgomery domain. + * * Preconditions: * 0 ≤ eval arg1 < m * Postconditions: * eval (from_montgomery out1) mod m = -eval (from_montgomery arg1) mod m * 0 ≤ eval out1 < m * - * Input Bounds: - * arg1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] - * Output Bounds: - * out1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] */ static void -fiat_secp384r1_opp(uint32_t out1[12], const uint32_t arg1[12]) +fiat_secp384r1_opp( + fiat_secp384r1_montgomery_domain_field_element out1, + const fiat_secp384r1_montgomery_domain_field_element arg1) { uint32_t x1; fiat_secp384r1_uint1 x2; @@ -12239,20 +12631,18 @@ fiat_secp384r1_opp(uint32_t out1[12], const uint32_t arg1[12]) /* * The function fiat_secp384r1_from_montgomery translates a field element out of the Montgomery domain. + * * Preconditions: * 0 ≤ eval arg1 < m * Postconditions: * eval out1 mod m = (eval arg1 * ((2^32)⁻¹ mod m)^12) mod m * 0 ≤ eval out1 < m * - * Input Bounds: - * arg1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] - * Output Bounds: - * out1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] */ static void -fiat_secp384r1_from_montgomery(uint32_t out1[12], - const uint32_t arg1[12]) +fiat_secp384r1_from_montgomery( + fiat_secp384r1_non_montgomery_domain_field_element out1, + const fiat_secp384r1_montgomery_domain_field_element arg1) { uint32_t x1; uint32_t x2; @@ -13798,20 +14188,18 @@ fiat_secp384r1_from_montgomery(uint32_t out1[12], /* * The function fiat_secp384r1_to_montgomery translates a field element into the Montgomery domain. + * * Preconditions: * 0 ≤ eval arg1 < m * Postconditions: * eval (from_montgomery out1) mod m = eval arg1 mod m * 0 ≤ eval out1 < m * - * Input Bounds: - * arg1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] - * Output Bounds: - * out1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] */ static void -fiat_secp384r1_to_montgomery(uint32_t out1[12], - const uint32_t arg1[12]) +fiat_secp384r1_to_montgomery( + fiat_secp384r1_montgomery_domain_field_element out1, + const fiat_secp384r1_non_montgomery_domain_field_element arg1) { uint32_t x1; uint32_t x2; @@ -15650,6 +16038,7 @@ fiat_secp384r1_to_montgomery(uint32_t out1[12], /* * The function fiat_secp384r1_nonzero outputs a single non-zero word if the input is non-zero and zero otherwise. + * * Preconditions: * 0 ≤ eval arg1 < m * Postconditions: @@ -15678,6 +16067,7 @@ fiat_secp384r1_nonzero(uint32_t *out1, const uint32_t arg1[12]) /* * The function fiat_secp384r1_selectznz is a multi-limb conditional select. + * * Postconditions: * eval out1 = (if arg1 = 0 then eval arg2 else eval arg3) * @@ -15734,6 +16124,7 @@ fiat_secp384r1_selectznz(uint32_t out1[12], /* * The function fiat_secp384r1_to_bytes serializes a field element NOT in the Montgomery domain to bytes in little-endian order. + * * Preconditions: * 0 ≤ eval arg1 < m * Postconditions: @@ -15967,6 +16358,7 @@ fiat_secp384r1_to_bytes(uint8_t out1[48], const uint32_t arg1[12]) /* * The function fiat_secp384r1_from_bytes deserializes a field element NOT in the Montgomery domain from bytes in little-endian order. + * * Preconditions: * 0 ≤ bytes_eval arg1 < m * Postconditions: @@ -16164,75 +16556,684 @@ fiat_secp384r1_from_bytes(uint32_t out1[12], out1[11] = x84; } -/* END verbatim fiat code */ - -/*- - * Finite field inversion via FLT. - * NB: this is not a real Fiat function, just named that way for consistency. - * Autogenerated: ecp/secp384r1/fe_inv.op3 - * custom repunit addition chain +/* + * The function fiat_secp384r1_divstep computes a divstep. + * + * Preconditions: + * 0 ≤ eval arg4 < m + * 0 ≤ eval arg5 < m + * Postconditions: + * out1 = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then 1 - arg1 else 1 + arg1) + * twos_complement_eval out2 = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then twos_complement_eval arg3 else twos_complement_eval arg2) + * twos_complement_eval out3 = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then ⌊(twos_complement_eval arg3 - twos_complement_eval arg2) / 2⌋ else ⌊(twos_complement_eval arg3 + (twos_complement_eval arg3 mod 2) * twos_complement_eval arg2) / 2⌋) + * eval (from_montgomery out4) mod m = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then (2 * eval (from_montgomery arg5)) mod m else (2 * eval (from_montgomery arg4)) mod m) + * eval (from_montgomery out5) mod m = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then (eval (from_montgomery arg4) - eval (from_montgomery arg4)) mod m else (eval (from_montgomery arg5) + (twos_complement_eval arg3 mod 2) * eval (from_montgomery arg4)) mod m) + * 0 ≤ eval out5 < m + * 0 ≤ eval out5 < m + * 0 ≤ eval out2 < m + * 0 ≤ eval out3 < m + * + * Input Bounds: + * arg1: [0x0 ~> 0xffffffff] + * arg2: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] + * arg3: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] + * arg4: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] + * arg5: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] + * Output Bounds: + * out1: [0x0 ~> 0xffffffff] + * out2: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] + * out3: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] + * out4: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] + * out5: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] */ static void -fiat_secp384r1_inv(fe_t output, const fe_t t1) +fiat_secp384r1_divstep( + uint32_t *out1, uint32_t out2[13], uint32_t out3[13], uint32_t out4[12], + uint32_t out5[12], uint32_t arg1, const uint32_t arg2[13], + const uint32_t arg3[13], const uint32_t arg4[12], const uint32_t arg5[12]) { - int i; - /* temporary variables */ - fe_t acc, t10, t170, t2, t20, t255, t30, t32, t4, t64, t8, t84, t85; - - fiat_secp384r1_square(acc, t1); - fiat_secp384r1_mul(t2, acc, t1); - fiat_secp384r1_square(acc, t2); - fiat_secp384r1_square(acc, acc); - fiat_secp384r1_mul(t4, acc, t2); - fiat_secp384r1_square(acc, t4); - for (i = 0; i < 3; i++) - fiat_secp384r1_square(acc, acc); - fiat_secp384r1_mul(t8, acc, t4); - fiat_secp384r1_square(acc, t8); - fiat_secp384r1_square(acc, acc); - fiat_secp384r1_mul(t10, acc, t2); - fiat_secp384r1_square(acc, t10); - for (i = 0; i < 9; i++) - fiat_secp384r1_square(acc, acc); - fiat_secp384r1_mul(t20, acc, t10); - fiat_secp384r1_square(acc, t20); - for (i = 0; i < 9; i++) - fiat_secp384r1_square(acc, acc); - fiat_secp384r1_mul(t30, acc, t10); - fiat_secp384r1_square(acc, t30); - fiat_secp384r1_square(acc, acc); - fiat_secp384r1_mul(t32, acc, t2); - fiat_secp384r1_square(acc, t32); - for (i = 0; i < 31; i++) - fiat_secp384r1_square(acc, acc); - fiat_secp384r1_mul(t64, acc, t32); - fiat_secp384r1_square(acc, t64); - for (i = 0; i < 19; i++) - fiat_secp384r1_square(acc, acc); - fiat_secp384r1_mul(t84, acc, t20); - fiat_secp384r1_square(acc, t84); - fiat_secp384r1_mul(t85, acc, t1); - fiat_secp384r1_square(acc, t85); - for (i = 0; i < 84; i++) - fiat_secp384r1_square(acc, acc); - fiat_secp384r1_mul(t170, acc, t85); - fiat_secp384r1_square(acc, t170); - for (i = 0; i < 84; i++) - fiat_secp384r1_square(acc, acc); - fiat_secp384r1_mul(t255, acc, t85); - fiat_secp384r1_square(acc, t255); - for (i = 0; i < 32; i++) - fiat_secp384r1_square(acc, acc); - fiat_secp384r1_mul(acc, acc, t32); - for (i = 0; i < 94; i++) - fiat_secp384r1_square(acc, acc); - fiat_secp384r1_mul(acc, acc, t30); - for (i = 0; i < 2; i++) - fiat_secp384r1_square(acc, acc); - fiat_secp384r1_mul(output, acc, t1); + uint32_t x1; + fiat_secp384r1_uint1 x2; + fiat_secp384r1_uint1 x3; + uint32_t x4; + fiat_secp384r1_uint1 x5; + uint32_t x6; + uint32_t x7; + uint32_t x8; + uint32_t x9; + uint32_t x10; + uint32_t x11; + uint32_t x12; + uint32_t x13; + uint32_t x14; + uint32_t x15; + uint32_t x16; + uint32_t x17; + uint32_t x18; + uint32_t x19; + uint32_t x20; + fiat_secp384r1_uint1 x21; + uint32_t x22; + fiat_secp384r1_uint1 x23; + uint32_t x24; + fiat_secp384r1_uint1 x25; + uint32_t x26; + fiat_secp384r1_uint1 x27; + uint32_t x28; + fiat_secp384r1_uint1 x29; + uint32_t x30; + fiat_secp384r1_uint1 x31; + uint32_t x32; + fiat_secp384r1_uint1 x33; + uint32_t x34; + fiat_secp384r1_uint1 x35; + uint32_t x36; + fiat_secp384r1_uint1 x37; + uint32_t x38; + fiat_secp384r1_uint1 x39; + uint32_t x40; + fiat_secp384r1_uint1 x41; + uint32_t x42; + fiat_secp384r1_uint1 x43; + uint32_t x44; + fiat_secp384r1_uint1 x45; + uint32_t x46; + uint32_t x47; + uint32_t x48; + uint32_t x49; + uint32_t x50; + uint32_t x51; + uint32_t x52; + uint32_t x53; + uint32_t x54; + uint32_t x55; + uint32_t x56; + uint32_t x57; + uint32_t x58; + uint32_t x59; + uint32_t x60; + uint32_t x61; + uint32_t x62; + uint32_t x63; + uint32_t x64; + uint32_t x65; + uint32_t x66; + uint32_t x67; + uint32_t x68; + uint32_t x69; + uint32_t x70; + uint32_t x71; + fiat_secp384r1_uint1 x72; + uint32_t x73; + fiat_secp384r1_uint1 x74; + uint32_t x75; + fiat_secp384r1_uint1 x76; + uint32_t x77; + fiat_secp384r1_uint1 x78; + uint32_t x79; + fiat_secp384r1_uint1 x80; + uint32_t x81; + fiat_secp384r1_uint1 x82; + uint32_t x83; + fiat_secp384r1_uint1 x84; + uint32_t x85; + fiat_secp384r1_uint1 x86; + uint32_t x87; + fiat_secp384r1_uint1 x88; + uint32_t x89; + fiat_secp384r1_uint1 x90; + uint32_t x91; + fiat_secp384r1_uint1 x92; + uint32_t x93; + fiat_secp384r1_uint1 x94; + uint32_t x95; + fiat_secp384r1_uint1 x96; + uint32_t x97; + fiat_secp384r1_uint1 x98; + uint32_t x99; + fiat_secp384r1_uint1 x100; + uint32_t x101; + fiat_secp384r1_uint1 x102; + uint32_t x103; + fiat_secp384r1_uint1 x104; + uint32_t x105; + fiat_secp384r1_uint1 x106; + uint32_t x107; + fiat_secp384r1_uint1 x108; + uint32_t x109; + fiat_secp384r1_uint1 x110; + uint32_t x111; + fiat_secp384r1_uint1 x112; + uint32_t x113; + fiat_secp384r1_uint1 x114; + uint32_t x115; + fiat_secp384r1_uint1 x116; + uint32_t x117; + fiat_secp384r1_uint1 x118; + uint32_t x119; + fiat_secp384r1_uint1 x120; + uint32_t x121; + uint32_t x122; + uint32_t x123; + uint32_t x124; + uint32_t x125; + uint32_t x126; + uint32_t x127; + uint32_t x128; + uint32_t x129; + uint32_t x130; + uint32_t x131; + uint32_t x132; + uint32_t x133; + fiat_secp384r1_uint1 x134; + uint32_t x135; + fiat_secp384r1_uint1 x136; + uint32_t x137; + fiat_secp384r1_uint1 x138; + uint32_t x139; + fiat_secp384r1_uint1 x140; + uint32_t x141; + fiat_secp384r1_uint1 x142; + uint32_t x143; + fiat_secp384r1_uint1 x144; + uint32_t x145; + fiat_secp384r1_uint1 x146; + uint32_t x147; + fiat_secp384r1_uint1 x148; + uint32_t x149; + fiat_secp384r1_uint1 x150; + uint32_t x151; + fiat_secp384r1_uint1 x152; + uint32_t x153; + fiat_secp384r1_uint1 x154; + uint32_t x155; + fiat_secp384r1_uint1 x156; + uint32_t x157; + uint32_t x158; + fiat_secp384r1_uint1 x159; + uint32_t x160; + fiat_secp384r1_uint1 x161; + uint32_t x162; + fiat_secp384r1_uint1 x163; + uint32_t x164; + fiat_secp384r1_uint1 x165; + uint32_t x166; + fiat_secp384r1_uint1 x167; + uint32_t x168; + fiat_secp384r1_uint1 x169; + uint32_t x170; + fiat_secp384r1_uint1 x171; + uint32_t x172; + fiat_secp384r1_uint1 x173; + uint32_t x174; + fiat_secp384r1_uint1 x175; + uint32_t x176; + fiat_secp384r1_uint1 x177; + uint32_t x178; + fiat_secp384r1_uint1 x179; + uint32_t x180; + fiat_secp384r1_uint1 x181; + uint32_t x182; + uint32_t x183; + uint32_t x184; + uint32_t x185; + uint32_t x186; + uint32_t x187; + uint32_t x188; + uint32_t x189; + uint32_t x190; + uint32_t x191; + uint32_t x192; + uint32_t x193; + fiat_secp384r1_uint1 x194; + uint32_t x195; + uint32_t x196; + uint32_t x197; + uint32_t x198; + uint32_t x199; + uint32_t x200; + uint32_t x201; + uint32_t x202; + uint32_t x203; + uint32_t x204; + uint32_t x205; + uint32_t x206; + uint32_t x207; + uint32_t x208; + fiat_secp384r1_uint1 x209; + uint32_t x210; + fiat_secp384r1_uint1 x211; + uint32_t x212; + fiat_secp384r1_uint1 x213; + uint32_t x214; + fiat_secp384r1_uint1 x215; + uint32_t x216; + fiat_secp384r1_uint1 x217; + uint32_t x218; + fiat_secp384r1_uint1 x219; + uint32_t x220; + fiat_secp384r1_uint1 x221; + uint32_t x222; + fiat_secp384r1_uint1 x223; + uint32_t x224; + fiat_secp384r1_uint1 x225; + uint32_t x226; + fiat_secp384r1_uint1 x227; + uint32_t x228; + fiat_secp384r1_uint1 x229; + uint32_t x230; + fiat_secp384r1_uint1 x231; + uint32_t x232; + fiat_secp384r1_uint1 x233; + uint32_t x234; + uint32_t x235; + uint32_t x236; + uint32_t x237; + uint32_t x238; + uint32_t x239; + uint32_t x240; + uint32_t x241; + uint32_t x242; + uint32_t x243; + uint32_t x244; + uint32_t x245; + uint32_t x246; + fiat_secp384r1_uint1 x247; + uint32_t x248; + fiat_secp384r1_uint1 x249; + uint32_t x250; + fiat_secp384r1_uint1 x251; + uint32_t x252; + fiat_secp384r1_uint1 x253; + uint32_t x254; + fiat_secp384r1_uint1 x255; + uint32_t x256; + fiat_secp384r1_uint1 x257; + uint32_t x258; + fiat_secp384r1_uint1 x259; + uint32_t x260; + fiat_secp384r1_uint1 x261; + uint32_t x262; + fiat_secp384r1_uint1 x263; + uint32_t x264; + fiat_secp384r1_uint1 x265; + uint32_t x266; + fiat_secp384r1_uint1 x267; + uint32_t x268; + fiat_secp384r1_uint1 x269; + uint32_t x270; + fiat_secp384r1_uint1 x271; + uint32_t x272; + fiat_secp384r1_uint1 x273; + uint32_t x274; + fiat_secp384r1_uint1 x275; + uint32_t x276; + fiat_secp384r1_uint1 x277; + uint32_t x278; + fiat_secp384r1_uint1 x279; + uint32_t x280; + fiat_secp384r1_uint1 x281; + uint32_t x282; + fiat_secp384r1_uint1 x283; + uint32_t x284; + fiat_secp384r1_uint1 x285; + uint32_t x286; + fiat_secp384r1_uint1 x287; + uint32_t x288; + fiat_secp384r1_uint1 x289; + uint32_t x290; + fiat_secp384r1_uint1 x291; + uint32_t x292; + fiat_secp384r1_uint1 x293; + uint32_t x294; + fiat_secp384r1_uint1 x295; + uint32_t x296; + fiat_secp384r1_uint1 x297; + uint32_t x298; + uint32_t x299; + uint32_t x300; + uint32_t x301; + uint32_t x302; + uint32_t x303; + uint32_t x304; + uint32_t x305; + uint32_t x306; + uint32_t x307; + uint32_t x308; + uint32_t x309; + uint32_t x310; + uint32_t x311; + uint32_t x312; + uint32_t x313; + uint32_t x314; + uint32_t x315; + uint32_t x316; + uint32_t x317; + uint32_t x318; + uint32_t x319; + uint32_t x320; + uint32_t x321; + uint32_t x322; + uint32_t x323; + uint32_t x324; + uint32_t x325; + uint32_t x326; + uint32_t x327; + uint32_t x328; + uint32_t x329; + uint32_t x330; + uint32_t x331; + uint32_t x332; + uint32_t x333; + uint32_t x334; + fiat_secp384r1_addcarryx_u32(&x1, &x2, 0x0, (~arg1), 0x1); + x3 = (fiat_secp384r1_uint1)((fiat_secp384r1_uint1)(x1 >> 31) & + (fiat_secp384r1_uint1)((arg3[0]) & 0x1)); + fiat_secp384r1_addcarryx_u32(&x4, &x5, 0x0, (~arg1), 0x1); + fiat_secp384r1_cmovznz_u32(&x6, x3, arg1, x4); + fiat_secp384r1_cmovznz_u32(&x7, x3, (arg2[0]), (arg3[0])); + fiat_secp384r1_cmovznz_u32(&x8, x3, (arg2[1]), (arg3[1])); + fiat_secp384r1_cmovznz_u32(&x9, x3, (arg2[2]), (arg3[2])); + fiat_secp384r1_cmovznz_u32(&x10, x3, (arg2[3]), (arg3[3])); + fiat_secp384r1_cmovznz_u32(&x11, x3, (arg2[4]), (arg3[4])); + fiat_secp384r1_cmovznz_u32(&x12, x3, (arg2[5]), (arg3[5])); + fiat_secp384r1_cmovznz_u32(&x13, x3, (arg2[6]), (arg3[6])); + fiat_secp384r1_cmovznz_u32(&x14, x3, (arg2[7]), (arg3[7])); + fiat_secp384r1_cmovznz_u32(&x15, x3, (arg2[8]), (arg3[8])); + fiat_secp384r1_cmovznz_u32(&x16, x3, (arg2[9]), (arg3[9])); + fiat_secp384r1_cmovznz_u32(&x17, x3, (arg2[10]), (arg3[10])); + fiat_secp384r1_cmovznz_u32(&x18, x3, (arg2[11]), (arg3[11])); + fiat_secp384r1_cmovznz_u32(&x19, x3, (arg2[12]), (arg3[12])); + fiat_secp384r1_addcarryx_u32(&x20, &x21, 0x0, 0x1, (~(arg2[0]))); + fiat_secp384r1_addcarryx_u32(&x22, &x23, x21, 0x0, (~(arg2[1]))); + fiat_secp384r1_addcarryx_u32(&x24, &x25, x23, 0x0, (~(arg2[2]))); + fiat_secp384r1_addcarryx_u32(&x26, &x27, x25, 0x0, (~(arg2[3]))); + fiat_secp384r1_addcarryx_u32(&x28, &x29, x27, 0x0, (~(arg2[4]))); + fiat_secp384r1_addcarryx_u32(&x30, &x31, x29, 0x0, (~(arg2[5]))); + fiat_secp384r1_addcarryx_u32(&x32, &x33, x31, 0x0, (~(arg2[6]))); + fiat_secp384r1_addcarryx_u32(&x34, &x35, x33, 0x0, (~(arg2[7]))); + fiat_secp384r1_addcarryx_u32(&x36, &x37, x35, 0x0, (~(arg2[8]))); + fiat_secp384r1_addcarryx_u32(&x38, &x39, x37, 0x0, (~(arg2[9]))); + fiat_secp384r1_addcarryx_u32(&x40, &x41, x39, 0x0, (~(arg2[10]))); + fiat_secp384r1_addcarryx_u32(&x42, &x43, x41, 0x0, (~(arg2[11]))); + fiat_secp384r1_addcarryx_u32(&x44, &x45, x43, 0x0, (~(arg2[12]))); + fiat_secp384r1_cmovznz_u32(&x46, x3, (arg3[0]), x20); + fiat_secp384r1_cmovznz_u32(&x47, x3, (arg3[1]), x22); + fiat_secp384r1_cmovznz_u32(&x48, x3, (arg3[2]), x24); + fiat_secp384r1_cmovznz_u32(&x49, x3, (arg3[3]), x26); + fiat_secp384r1_cmovznz_u32(&x50, x3, (arg3[4]), x28); + fiat_secp384r1_cmovznz_u32(&x51, x3, (arg3[5]), x30); + fiat_secp384r1_cmovznz_u32(&x52, x3, (arg3[6]), x32); + fiat_secp384r1_cmovznz_u32(&x53, x3, (arg3[7]), x34); + fiat_secp384r1_cmovznz_u32(&x54, x3, (arg3[8]), x36); + fiat_secp384r1_cmovznz_u32(&x55, x3, (arg3[9]), x38); + fiat_secp384r1_cmovznz_u32(&x56, x3, (arg3[10]), x40); + fiat_secp384r1_cmovznz_u32(&x57, x3, (arg3[11]), x42); + fiat_secp384r1_cmovznz_u32(&x58, x3, (arg3[12]), x44); + fiat_secp384r1_cmovznz_u32(&x59, x3, (arg4[0]), (arg5[0])); + fiat_secp384r1_cmovznz_u32(&x60, x3, (arg4[1]), (arg5[1])); + fiat_secp384r1_cmovznz_u32(&x61, x3, (arg4[2]), (arg5[2])); + fiat_secp384r1_cmovznz_u32(&x62, x3, (arg4[3]), (arg5[3])); + fiat_secp384r1_cmovznz_u32(&x63, x3, (arg4[4]), (arg5[4])); + fiat_secp384r1_cmovznz_u32(&x64, x3, (arg4[5]), (arg5[5])); + fiat_secp384r1_cmovznz_u32(&x65, x3, (arg4[6]), (arg5[6])); + fiat_secp384r1_cmovznz_u32(&x66, x3, (arg4[7]), (arg5[7])); + fiat_secp384r1_cmovznz_u32(&x67, x3, (arg4[8]), (arg5[8])); + fiat_secp384r1_cmovznz_u32(&x68, x3, (arg4[9]), (arg5[9])); + fiat_secp384r1_cmovznz_u32(&x69, x3, (arg4[10]), (arg5[10])); + fiat_secp384r1_cmovznz_u32(&x70, x3, (arg4[11]), (arg5[11])); + fiat_secp384r1_addcarryx_u32(&x71, &x72, 0x0, x59, x59); + fiat_secp384r1_addcarryx_u32(&x73, &x74, x72, x60, x60); + fiat_secp384r1_addcarryx_u32(&x75, &x76, x74, x61, x61); + fiat_secp384r1_addcarryx_u32(&x77, &x78, x76, x62, x62); + fiat_secp384r1_addcarryx_u32(&x79, &x80, x78, x63, x63); + fiat_secp384r1_addcarryx_u32(&x81, &x82, x80, x64, x64); + fiat_secp384r1_addcarryx_u32(&x83, &x84, x82, x65, x65); + fiat_secp384r1_addcarryx_u32(&x85, &x86, x84, x66, x66); + fiat_secp384r1_addcarryx_u32(&x87, &x88, x86, x67, x67); + fiat_secp384r1_addcarryx_u32(&x89, &x90, x88, x68, x68); + fiat_secp384r1_addcarryx_u32(&x91, &x92, x90, x69, x69); + fiat_secp384r1_addcarryx_u32(&x93, &x94, x92, x70, x70); + fiat_secp384r1_subborrowx_u32(&x95, &x96, 0x0, x71, UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x97, &x98, x96, x73, 0x0); + fiat_secp384r1_subborrowx_u32(&x99, &x100, x98, x75, 0x0); + fiat_secp384r1_subborrowx_u32(&x101, &x102, x100, x77, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x103, &x104, x102, x79, + UINT32_C(0xfffffffe)); + fiat_secp384r1_subborrowx_u32(&x105, &x106, x104, x81, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x107, &x108, x106, x83, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x109, &x110, x108, x85, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x111, &x112, x110, x87, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x113, &x114, x112, x89, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x115, &x116, x114, x91, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x117, &x118, x116, x93, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x119, &x120, x118, x94, 0x0); + x121 = (arg4[11]); + x122 = (arg4[10]); + x123 = (arg4[9]); + x124 = (arg4[8]); + x125 = (arg4[7]); + x126 = (arg4[6]); + x127 = (arg4[5]); + x128 = (arg4[4]); + x129 = (arg4[3]); + x130 = (arg4[2]); + x131 = (arg4[1]); + x132 = (arg4[0]); + fiat_secp384r1_subborrowx_u32(&x133, &x134, 0x0, 0x0, x132); + fiat_secp384r1_subborrowx_u32(&x135, &x136, x134, 0x0, x131); + fiat_secp384r1_subborrowx_u32(&x137, &x138, x136, 0x0, x130); + fiat_secp384r1_subborrowx_u32(&x139, &x140, x138, 0x0, x129); + fiat_secp384r1_subborrowx_u32(&x141, &x142, x140, 0x0, x128); + fiat_secp384r1_subborrowx_u32(&x143, &x144, x142, 0x0, x127); + fiat_secp384r1_subborrowx_u32(&x145, &x146, x144, 0x0, x126); + fiat_secp384r1_subborrowx_u32(&x147, &x148, x146, 0x0, x125); + fiat_secp384r1_subborrowx_u32(&x149, &x150, x148, 0x0, x124); + fiat_secp384r1_subborrowx_u32(&x151, &x152, x150, 0x0, x123); + fiat_secp384r1_subborrowx_u32(&x153, &x154, x152, 0x0, x122); + fiat_secp384r1_subborrowx_u32(&x155, &x156, x154, 0x0, x121); + fiat_secp384r1_cmovznz_u32(&x157, x156, 0x0, UINT32_C(0xffffffff)); + fiat_secp384r1_addcarryx_u32(&x158, &x159, 0x0, x133, x157); + fiat_secp384r1_addcarryx_u32(&x160, &x161, x159, x135, 0x0); + fiat_secp384r1_addcarryx_u32(&x162, &x163, x161, x137, 0x0); + fiat_secp384r1_addcarryx_u32(&x164, &x165, x163, x139, x157); + fiat_secp384r1_addcarryx_u32(&x166, &x167, x165, x141, + (x157 & UINT32_C(0xfffffffe))); + fiat_secp384r1_addcarryx_u32(&x168, &x169, x167, x143, x157); + fiat_secp384r1_addcarryx_u32(&x170, &x171, x169, x145, x157); + fiat_secp384r1_addcarryx_u32(&x172, &x173, x171, x147, x157); + fiat_secp384r1_addcarryx_u32(&x174, &x175, x173, x149, x157); + fiat_secp384r1_addcarryx_u32(&x176, &x177, x175, x151, x157); + fiat_secp384r1_addcarryx_u32(&x178, &x179, x177, x153, x157); + fiat_secp384r1_addcarryx_u32(&x180, &x181, x179, x155, x157); + fiat_secp384r1_cmovznz_u32(&x182, x3, (arg5[0]), x158); + fiat_secp384r1_cmovznz_u32(&x183, x3, (arg5[1]), x160); + fiat_secp384r1_cmovznz_u32(&x184, x3, (arg5[2]), x162); + fiat_secp384r1_cmovznz_u32(&x185, x3, (arg5[3]), x164); + fiat_secp384r1_cmovznz_u32(&x186, x3, (arg5[4]), x166); + fiat_secp384r1_cmovznz_u32(&x187, x3, (arg5[5]), x168); + fiat_secp384r1_cmovznz_u32(&x188, x3, (arg5[6]), x170); + fiat_secp384r1_cmovznz_u32(&x189, x3, (arg5[7]), x172); + fiat_secp384r1_cmovznz_u32(&x190, x3, (arg5[8]), x174); + fiat_secp384r1_cmovznz_u32(&x191, x3, (arg5[9]), x176); + fiat_secp384r1_cmovznz_u32(&x192, x3, (arg5[10]), x178); + fiat_secp384r1_cmovznz_u32(&x193, x3, (arg5[11]), x180); + x194 = (fiat_secp384r1_uint1)(x46 & 0x1); + fiat_secp384r1_cmovznz_u32(&x195, x194, 0x0, x7); + fiat_secp384r1_cmovznz_u32(&x196, x194, 0x0, x8); + fiat_secp384r1_cmovznz_u32(&x197, x194, 0x0, x9); + fiat_secp384r1_cmovznz_u32(&x198, x194, 0x0, x10); + fiat_secp384r1_cmovznz_u32(&x199, x194, 0x0, x11); + fiat_secp384r1_cmovznz_u32(&x200, x194, 0x0, x12); + fiat_secp384r1_cmovznz_u32(&x201, x194, 0x0, x13); + fiat_secp384r1_cmovznz_u32(&x202, x194, 0x0, x14); + fiat_secp384r1_cmovznz_u32(&x203, x194, 0x0, x15); + fiat_secp384r1_cmovznz_u32(&x204, x194, 0x0, x16); + fiat_secp384r1_cmovznz_u32(&x205, x194, 0x0, x17); + fiat_secp384r1_cmovznz_u32(&x206, x194, 0x0, x18); + fiat_secp384r1_cmovznz_u32(&x207, x194, 0x0, x19); + fiat_secp384r1_addcarryx_u32(&x208, &x209, 0x0, x46, x195); + fiat_secp384r1_addcarryx_u32(&x210, &x211, x209, x47, x196); + fiat_secp384r1_addcarryx_u32(&x212, &x213, x211, x48, x197); + fiat_secp384r1_addcarryx_u32(&x214, &x215, x213, x49, x198); + fiat_secp384r1_addcarryx_u32(&x216, &x217, x215, x50, x199); + fiat_secp384r1_addcarryx_u32(&x218, &x219, x217, x51, x200); + fiat_secp384r1_addcarryx_u32(&x220, &x221, x219, x52, x201); + fiat_secp384r1_addcarryx_u32(&x222, &x223, x221, x53, x202); + fiat_secp384r1_addcarryx_u32(&x224, &x225, x223, x54, x203); + fiat_secp384r1_addcarryx_u32(&x226, &x227, x225, x55, x204); + fiat_secp384r1_addcarryx_u32(&x228, &x229, x227, x56, x205); + fiat_secp384r1_addcarryx_u32(&x230, &x231, x229, x57, x206); + fiat_secp384r1_addcarryx_u32(&x232, &x233, x231, x58, x207); + fiat_secp384r1_cmovznz_u32(&x234, x194, 0x0, x59); + fiat_secp384r1_cmovznz_u32(&x235, x194, 0x0, x60); + fiat_secp384r1_cmovznz_u32(&x236, x194, 0x0, x61); + fiat_secp384r1_cmovznz_u32(&x237, x194, 0x0, x62); + fiat_secp384r1_cmovznz_u32(&x238, x194, 0x0, x63); + fiat_secp384r1_cmovznz_u32(&x239, x194, 0x0, x64); + fiat_secp384r1_cmovznz_u32(&x240, x194, 0x0, x65); + fiat_secp384r1_cmovznz_u32(&x241, x194, 0x0, x66); + fiat_secp384r1_cmovznz_u32(&x242, x194, 0x0, x67); + fiat_secp384r1_cmovznz_u32(&x243, x194, 0x0, x68); + fiat_secp384r1_cmovznz_u32(&x244, x194, 0x0, x69); + fiat_secp384r1_cmovznz_u32(&x245, x194, 0x0, x70); + fiat_secp384r1_addcarryx_u32(&x246, &x247, 0x0, x182, x234); + fiat_secp384r1_addcarryx_u32(&x248, &x249, x247, x183, x235); + fiat_secp384r1_addcarryx_u32(&x250, &x251, x249, x184, x236); + fiat_secp384r1_addcarryx_u32(&x252, &x253, x251, x185, x237); + fiat_secp384r1_addcarryx_u32(&x254, &x255, x253, x186, x238); + fiat_secp384r1_addcarryx_u32(&x256, &x257, x255, x187, x239); + fiat_secp384r1_addcarryx_u32(&x258, &x259, x257, x188, x240); + fiat_secp384r1_addcarryx_u32(&x260, &x261, x259, x189, x241); + fiat_secp384r1_addcarryx_u32(&x262, &x263, x261, x190, x242); + fiat_secp384r1_addcarryx_u32(&x264, &x265, x263, x191, x243); + fiat_secp384r1_addcarryx_u32(&x266, &x267, x265, x192, x244); + fiat_secp384r1_addcarryx_u32(&x268, &x269, x267, x193, x245); + fiat_secp384r1_subborrowx_u32(&x270, &x271, 0x0, x246, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x272, &x273, x271, x248, 0x0); + fiat_secp384r1_subborrowx_u32(&x274, &x275, x273, x250, 0x0); + fiat_secp384r1_subborrowx_u32(&x276, &x277, x275, x252, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x278, &x279, x277, x254, + UINT32_C(0xfffffffe)); + fiat_secp384r1_subborrowx_u32(&x280, &x281, x279, x256, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x282, &x283, x281, x258, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x284, &x285, x283, x260, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x286, &x287, x285, x262, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x288, &x289, x287, x264, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x290, &x291, x289, x266, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x292, &x293, x291, x268, + UINT32_C(0xffffffff)); + fiat_secp384r1_subborrowx_u32(&x294, &x295, x293, x269, 0x0); + fiat_secp384r1_addcarryx_u32(&x296, &x297, 0x0, x6, 0x1); + x298 = ((x208 >> 1) | ((x210 << 31) & UINT32_C(0xffffffff))); + x299 = ((x210 >> 1) | ((x212 << 31) & UINT32_C(0xffffffff))); + x300 = ((x212 >> 1) | ((x214 << 31) & UINT32_C(0xffffffff))); + x301 = ((x214 >> 1) | ((x216 << 31) & UINT32_C(0xffffffff))); + x302 = ((x216 >> 1) | ((x218 << 31) & UINT32_C(0xffffffff))); + x303 = ((x218 >> 1) | ((x220 << 31) & UINT32_C(0xffffffff))); + x304 = ((x220 >> 1) | ((x222 << 31) & UINT32_C(0xffffffff))); + x305 = ((x222 >> 1) | ((x224 << 31) & UINT32_C(0xffffffff))); + x306 = ((x224 >> 1) | ((x226 << 31) & UINT32_C(0xffffffff))); + x307 = ((x226 >> 1) | ((x228 << 31) & UINT32_C(0xffffffff))); + x308 = ((x228 >> 1) | ((x230 << 31) & UINT32_C(0xffffffff))); + x309 = ((x230 >> 1) | ((x232 << 31) & UINT32_C(0xffffffff))); + x310 = ((x232 & UINT32_C(0x80000000)) | (x232 >> 1)); + fiat_secp384r1_cmovznz_u32(&x311, x120, x95, x71); + fiat_secp384r1_cmovznz_u32(&x312, x120, x97, x73); + fiat_secp384r1_cmovznz_u32(&x313, x120, x99, x75); + fiat_secp384r1_cmovznz_u32(&x314, x120, x101, x77); + fiat_secp384r1_cmovznz_u32(&x315, x120, x103, x79); + fiat_secp384r1_cmovznz_u32(&x316, x120, x105, x81); + fiat_secp384r1_cmovznz_u32(&x317, x120, x107, x83); + fiat_secp384r1_cmovznz_u32(&x318, x120, x109, x85); + fiat_secp384r1_cmovznz_u32(&x319, x120, x111, x87); + fiat_secp384r1_cmovznz_u32(&x320, x120, x113, x89); + fiat_secp384r1_cmovznz_u32(&x321, x120, x115, x91); + fiat_secp384r1_cmovznz_u32(&x322, x120, x117, x93); + fiat_secp384r1_cmovznz_u32(&x323, x295, x270, x246); + fiat_secp384r1_cmovznz_u32(&x324, x295, x272, x248); + fiat_secp384r1_cmovznz_u32(&x325, x295, x274, x250); + fiat_secp384r1_cmovznz_u32(&x326, x295, x276, x252); + fiat_secp384r1_cmovznz_u32(&x327, x295, x278, x254); + fiat_secp384r1_cmovznz_u32(&x328, x295, x280, x256); + fiat_secp384r1_cmovznz_u32(&x329, x295, x282, x258); + fiat_secp384r1_cmovznz_u32(&x330, x295, x284, x260); + fiat_secp384r1_cmovznz_u32(&x331, x295, x286, x262); + fiat_secp384r1_cmovznz_u32(&x332, x295, x288, x264); + fiat_secp384r1_cmovznz_u32(&x333, x295, x290, x266); + fiat_secp384r1_cmovznz_u32(&x334, x295, x292, x268); + *out1 = x296; + out2[0] = x7; + out2[1] = x8; + out2[2] = x9; + out2[3] = x10; + out2[4] = x11; + out2[5] = x12; + out2[6] = x13; + out2[7] = x14; + out2[8] = x15; + out2[9] = x16; + out2[10] = x17; + out2[11] = x18; + out2[12] = x19; + out3[0] = x298; + out3[1] = x299; + out3[2] = x300; + out3[3] = x301; + out3[4] = x302; + out3[5] = x303; + out3[6] = x304; + out3[7] = x305; + out3[8] = x306; + out3[9] = x307; + out3[10] = x308; + out3[11] = x309; + out3[12] = x310; + out4[0] = x311; + out4[1] = x312; + out4[2] = x313; + out4[3] = x314; + out4[4] = x315; + out4[5] = x316; + out4[6] = x317; + out4[7] = x318; + out4[8] = x319; + out4[9] = x320; + out4[10] = x321; + out4[11] = x322; + out5[0] = x323; + out5[1] = x324; + out5[2] = x325; + out5[3] = x326; + out5[4] = x327; + out5[5] = x328; + out5[6] = x329; + out5[7] = x330; + out5[8] = x331; + out5[9] = x332; + out5[10] = x333; + out5[11] = x334; } -/* curve coefficient constants */ +/* END verbatim fiat code */ + +/* curve-related constants */ static const limb_t const_one[12] = { UINT32_C(0x00000001), UINT32_C(0xFFFFFFFF), UINT32_C(0xFFFFFFFF), @@ -16248,6 +17249,20 @@ static const limb_t const_b[12] = { UINT32_C(0xB62B21F4), UINT32_C(0x604FBFF9), UINT32_C(0xCD08114B) }; +static const limb_t const_divstep[12] = { + UINT32_C(0x00005000), UINT32_C(0xFFFFC800), UINT32_C(0xFFFF83FF), + UINT32_C(0xFFFFB3FF), UINT32_C(0xFFFFFFFF), UINT32_C(0xFFFFF7FF), + UINT32_C(0xFFFFEFFF), UINT32_C(0xFFFFEBFF), UINT32_C(0xFFFFF3FF), + UINT32_C(0x00000BFF), UINT32_C(0x00003000), UINT32_C(0x00005000) +}; + +static const limb_t const_psat[12] = { + UINT32_C(0xFFFFFFFF), UINT32_C(0x00000000), UINT32_C(0x00000000), + UINT32_C(0xFFFFFFFF), UINT32_C(0xFFFFFFFE), UINT32_C(0xFFFFFFFF), + UINT32_C(0xFFFFFFFF), UINT32_C(0xFFFFFFFF), UINT32_C(0xFFFFFFFF), + UINT32_C(0xFFFFFFFF), UINT32_C(0xFFFFFFFF), UINT32_C(0xFFFFFFFF) +}; + /* LUT for scalar multiplication by comb interleaving */ static const pt_aff_t lut_cmb[21][16] = { { @@ -18983,6 +19998,42 @@ static const pt_aff_t lut_cmb[21][16] = { }; /*- + * Finite field inversion. + * Computed with Bernstein-Yang algorithm. + * https://tches.iacr.org/index.php/TCHES/article/view/8298 + * Based on https://github.com/mit-plv/fiat-crypto/tree/master/inversion/c + * NB: this is not a real fiat-crypto function, just named that way for consistency. + */ +static void +fiat_secp384r1_inv(fe_t output, const fe_t t1) +{ + int i; + fe_t v1, r1, v2; + limb_t *r2 = output; + limb_t f1[LIMB_CNT + 1], g1[LIMB_CNT + 1], f2[LIMB_CNT + 1], + g2[LIMB_CNT + 1]; + limb_t d2, d1 = 1; + + fe_copy(g1, t1); + g1[LIMB_CNT] = 0; + fe_copy(f1, const_psat); + f1[LIMB_CNT] = 0; + fe_copy(r1, const_one); + fe_set_zero(v1); + + /* 1110 divstep iterations */ + for (i = 0; i < 555; i++) { + fiat_secp384r1_divstep(&d2, f2, g2, v2, r2, d1, f1, g1, v1, r1); + fiat_secp384r1_divstep(&d1, f1, g1, v1, r1, d2, f2, g2, v2, r2); + } + + fiat_secp384r1_opp(output, v1); + fiat_secp384r1_selectznz(output, f1[LIMB_CNT] >> (LIMB_BITS - 1), v1, + output); + fiat_secp384r1_mul(output, output, const_divstep); +} + +/*- * Q := 2P, both projective, Q and P same pointers OK * Autogenerated: op3/dbl_proj.op3 * https://eprint.iacr.org/2015/1060 Alg 6 @@ -19288,7 +20339,7 @@ var_smul_wnaf_two(pt_aff_t *out, const unsigned char a[48], int i, d, is_neg, is_inf = 1, flipped = 0; int8_t anaf[385] = { 0 }; int8_t bnaf[385] = { 0 }; - pt_prj_t Q = { { 0 } }; + pt_prj_t Q = { { 0 }, { 0 }, { 0 } }; pt_prj_t precomp[DRADIX / 2]; precomp_wnaf(precomp, P); @@ -19350,6 +20401,8 @@ var_smul_wnaf_two(pt_aff_t *out, const unsigned char a[48], /*- * Variable point scalar multiplication with "regular" wnaf. + * Here "regular" means _no zeroes_, so the sequence of + * EC arithmetic ops is fixed. */ static void var_smul_rwnaf(pt_aff_t *out, const unsigned char scalar[48], @@ -19357,14 +20410,14 @@ var_smul_rwnaf(pt_aff_t *out, const unsigned char scalar[48], { int i, j, d, diff, is_neg; int8_t rnaf[77] = { 0 }; - pt_prj_t Q = { { 0 } }, lut = { { 0 } }; + pt_prj_t Q = { { 0 }, { 0 }, { 0 } }, lut = { { 0 }, { 0 }, { 0 } }; pt_prj_t precomp[DRADIX / 2]; precomp_wnaf(precomp, P); scalar_rwnaf(rnaf, scalar); #if defined(_MSC_VER) -/* result still unsigned: yes we know */ + /* result still unsigned: yes we know */ #pragma warning(push) #pragma warning(disable : 4146) #endif @@ -19426,8 +20479,8 @@ fixed_smul_cmb(pt_aff_t *out, const unsigned char scalar[48]) { int i, j, k, d, diff, is_neg = 0; int8_t rnaf[77] = { 0 }; - pt_prj_t Q = { { 0 } }, R = { { 0 } }; - pt_aff_t lut = { { 0 } }; + pt_prj_t Q = { { 0 }, { 0 }, { 0 } }, R = { { 0 }, { 0 }, { 0 } }; + pt_aff_t lut = { { 0 }, { 0 } }; scalar_rwnaf(rnaf, scalar); @@ -19437,7 +20490,7 @@ fixed_smul_cmb(pt_aff_t *out, const unsigned char scalar[48]) fe_set_zero(Q.Z); #if defined(_MSC_VER) -/* result still unsigned: yes we know */ + /* result still unsigned: yes we know */ #pragma warning(push) #pragma warning(disable : 4146) #endif @@ -19491,10 +20544,11 @@ fixed_smul_cmb(pt_aff_t *out, const unsigned char scalar[48]) * Everything is LE byte ordering. */ static void -point_mul_two(unsigned char outx[48], unsigned char outy[48], - const unsigned char a[48], const unsigned char b[48], - const unsigned char inx[48], - const unsigned char iny[48]) +point_mul_two_secp384r1(unsigned char outx[48], unsigned char outy[48], + const unsigned char a[48], + const unsigned char b[48], + const unsigned char inx[48], + const unsigned char iny[48]) { pt_aff_t P; @@ -19517,8 +20571,8 @@ point_mul_two(unsigned char outx[48], unsigned char outy[48], * Everything is LE byte ordering. */ static void -point_mul_g(unsigned char outx[48], unsigned char outy[48], - const unsigned char scalar[48]) +point_mul_g_secp384r1(unsigned char outx[48], unsigned char outy[48], + const unsigned char scalar[48]) { pt_aff_t P; @@ -19537,10 +20591,10 @@ point_mul_g(unsigned char outx[48], unsigned char outy[48], * Everything is LE byte ordering. */ static void -point_mul(unsigned char outx[48], unsigned char outy[48], - const unsigned char scalar[48], - const unsigned char inx[48], - const unsigned char iny[48]) +point_mul_secp384r1(unsigned char outx[48], unsigned char outy[48], + const unsigned char scalar[48], + const unsigned char inx[48], + const unsigned char iny[48]) { pt_aff_t P; @@ -19642,8 +20696,8 @@ point_mul(unsigned char outx[48], unsigned char outy[48], } while (0) static mp_err -point_mul_g_secp384r1(const mp_int *n, mp_int *out_x, - mp_int *out_y, const ECGroup *group) +point_mul_g_secp384r1_wrap(const mp_int *n, mp_int *out_x, + mp_int *out_y, const ECGroup *group) { unsigned char b_x[48]; unsigned char b_y[48]; @@ -19658,7 +20712,7 @@ point_mul_g_secp384r1(const mp_int *n, mp_int *out_x, MP_CHECKOK(mp_to_fixlen_octets(n, b_n, 48)); MP_BE2LE(b_n); - point_mul_g(b_x, b_y, b_n); + point_mul_g_secp384r1(b_x, b_y, b_n); MP_BE2LE(b_x); MP_BE2LE(b_y); MP_CHECKOK(mp_read_unsigned_octets(out_x, b_x, 48)); @@ -19669,9 +20723,9 @@ CLEANUP: } static mp_err -point_mul_secp384r1(const mp_int *n, const mp_int *in_x, - const mp_int *in_y, mp_int *out_x, - mp_int *out_y, const ECGroup *group) +point_mul_secp384r1_wrap(const mp_int *n, const mp_int *in_x, + const mp_int *in_y, mp_int *out_x, + mp_int *out_y, const ECGroup *group) { unsigned char b_x[48]; unsigned char b_y[48]; @@ -19692,7 +20746,7 @@ point_mul_secp384r1(const mp_int *n, const mp_int *in_x, MP_BE2LE(b_x); MP_BE2LE(b_y); MP_BE2LE(b_n); - point_mul(b_x, b_y, b_n, b_x, b_y); + point_mul_secp384r1(b_x, b_y, b_n, b_x, b_y); MP_BE2LE(b_x); MP_BE2LE(b_y); MP_CHECKOK(mp_read_unsigned_octets(out_x, b_x, 48)); @@ -19703,10 +20757,11 @@ CLEANUP: } static mp_err -point_mul_two_secp384r1(const mp_int *n1, const mp_int *n2, - const mp_int *in_x, const mp_int *in_y, - mp_int *out_x, mp_int *out_y, - const ECGroup *group) +point_mul_two_secp384r1_wrap(const mp_int *n1, const mp_int *n2, + const mp_int *in_x, + const mp_int *in_y, mp_int *out_x, + mp_int *out_y, + const ECGroup *group) { unsigned char b_x[48]; unsigned char b_y[48]; @@ -19716,11 +20771,11 @@ point_mul_two_secp384r1(const mp_int *n1, const mp_int *n2, /* If n2 == NULL or 0, this is just a base-point multiplication. */ if (n2 == NULL || mp_cmp_z(n2) == MP_EQ) - return point_mul_g_secp384r1(n1, out_x, out_y, group); + return point_mul_g_secp384r1_wrap(n1, out_x, out_y, group); /* If n1 == NULL or 0, this is just an arbitary-point multiplication. */ if (n1 == NULL || mp_cmp_z(n1) == MP_EQ) - return point_mul_secp384r1(n2, in_x, in_y, out_x, out_y, group); + return point_mul_secp384r1_wrap(n2, in_x, in_y, out_x, out_y, group); ARGCHK(in_x != NULL && in_y != NULL && out_x != NULL && out_y != NULL, MP_BADARG); @@ -19738,7 +20793,7 @@ point_mul_two_secp384r1(const mp_int *n1, const mp_int *n2, MP_BE2LE(b_y); MP_BE2LE(b_n1); MP_BE2LE(b_n2); - point_mul_two(b_x, b_y, b_n1, b_n2, b_x, b_y); + point_mul_two_secp384r1(b_x, b_y, b_n1, b_n2, b_x, b_y); MP_BE2LE(b_x); MP_BE2LE(b_y); MP_CHECKOK(mp_read_unsigned_octets(out_x, b_x, 48)); @@ -19752,9 +20807,9 @@ mp_err ec_group_set_secp384r1(ECGroup *group, ECCurveName name) { if (name == ECCurve_NIST_P384) { - group->base_point_mul = &point_mul_g_secp384r1; - group->point_mul = &point_mul_secp384r1; - group->points_mul = &point_mul_two_secp384r1; + group->base_point_mul = &point_mul_g_secp384r1_wrap; + group->point_mul = &point_mul_secp384r1_wrap; + group->points_mul = &point_mul_two_secp384r1_wrap; } return MP_OKAY; } diff --git a/lib/freebl/ecl/ecp_secp521r1.c b/lib/freebl/ecl/ecp_secp521r1.c index d673eccca..57e3a81b8 100644 --- a/lib/freebl/ecl/ecp_secp521r1.c +++ b/lib/freebl/ecl/ecp_secp521r1.c @@ -1,19 +1,19 @@ /* Autogenerated: ECCKiila https://gitlab.com/nisec/ecckiila */ /*- * MIT License - * + * * Copyright (c) 2020 Luis Rivera-Zamarripa, Jesús-Javier Chi-Domínguez, Billy Bob Brumley - * + * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: - * + * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -35,13 +35,6 @@ typedef uint64_t limb_t; #define fe_copy(d, s) memcpy(d, s, sizeof(fe_t)) #define fe_set_zero(d) memset(d, 0, sizeof(fe_t)) -#define fiat_secp521r1_carry_add(c, a, b) \ - fiat_secp521r1_add(c, a, b); \ - fiat_secp521r1_carry(c, c) -#define fiat_secp521r1_carry_sub(c, a, b) \ - fiat_secp521r1_sub(c, a, b); \ - fiat_secp521r1_carry(c, c) - /* Projective points */ typedef struct { fe_t X; @@ -90,16 +83,32 @@ typedef struct { /* tight_bounds_multiplier = 1 (from "") */ /* */ /* Computed values: */ -/* carry_chain = [0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 1] */ -/* eval z = z[0] + (z[1] << 58) + (z[2] << 116) + (z[3] << 174) + (z[4] << 232) + (z[5] << 0x122) + (z[6] << 0x15c) + (z[7] << 0x196) + (z[8] << 0x1d0) */ -/* bytes_eval z = z[0] + (z[1] << 8) + (z[2] << 16) + (z[3] << 24) + (z[4] << 32) + (z[5] << 40) + (z[6] << 48) + (z[7] << 56) + (z[8] << 64) + (z[9] << 72) + (z[10] << 80) + (z[11] << 88) + (z[12] << 96) + (z[13] << 104) + (z[14] << 112) + (z[15] << 120) + (z[16] << 128) + (z[17] << 136) + (z[18] << 144) + (z[19] << 152) + (z[20] << 160) + (z[21] << 168) + (z[22] << 176) + (z[23] << 184) + (z[24] << 192) + (z[25] << 200) + (z[26] << 208) + (z[27] << 216) + (z[28] << 224) + (z[29] << 232) + (z[30] << 240) + (z[31] << 248) + (z[32] << 256) + (z[33] << 0x108) + (z[34] << 0x110) + (z[35] << 0x118) + (z[36] << 0x120) + (z[37] << 0x128) + (z[38] << 0x130) + (z[39] << 0x138) + (z[40] << 0x140) + (z[41] << 0x148) + (z[42] << 0x150) + (z[43] << 0x158) + (z[44] << 0x160) + (z[45] << 0x168) + (z[46] << 0x170) + (z[47] << 0x178) + (z[48] << 0x180) + (z[49] << 0x188) + (z[50] << 0x190) + (z[51] << 0x198) + (z[52] << 0x1a0) + (z[53] << 0x1a8) + (z[54] << 0x1b0) + (z[55] << 0x1b8) + (z[56] << 0x1c0) + (z[57] << 0x1c8) + (z[58] << 0x1d0) + (z[59] << 0x1d8) + (z[60] << 0x1e0) + (z[61] << 0x1e8) + (z[62] << 0x1f0) + (z[63] << 0x1f8) + (z[64] << 2^9) + (z[65] << 0x208) */ -/* balance = [0x7fffffffffffffe, 0x7fffffffffffffe, 0x7fffffffffffffe, 0x7fffffffffffffe, 0x7fffffffffffffe, 0x7fffffffffffffe, 0x7fffffffffffffe, 0x7fffffffffffffe, 0x3fffffffffffffe] */ +/* carry_chain = [0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 1] */ +/* eval z = z[0] + (z[1] << 58) + (z[2] << 116) + (z[3] << 174) + (z[4] << 232) + (z[5] << 0x122) + (z[6] << 0x15c) + (z[7] << 0x196) + (z[8] << 0x1d0) */ +/* bytes_eval z = z[0] + (z[1] << 8) + (z[2] << 16) + (z[3] << 24) + (z[4] << 32) + (z[5] << 40) + (z[6] << 48) + (z[7] << 56) + (z[8] << 64) + (z[9] << 72) + (z[10] << 80) + (z[11] << 88) + (z[12] << 96) + (z[13] << 104) + (z[14] << 112) + (z[15] << 120) + (z[16] << 128) + (z[17] << 136) + (z[18] << 144) + (z[19] << 152) + (z[20] << 160) + (z[21] << 168) + (z[22] << 176) + (z[23] << 184) + (z[24] << 192) + (z[25] << 200) + (z[26] << 208) + (z[27] << 216) + (z[28] << 224) + (z[29] << 232) + (z[30] << 240) + (z[31] << 248) + (z[32] << 256) + (z[33] << 0x108) + (z[34] << 0x110) + (z[35] << 0x118) + (z[36] << 0x120) + (z[37] << 0x128) + (z[38] << 0x130) + (z[39] << 0x138) + (z[40] << 0x140) + (z[41] << 0x148) + (z[42] << 0x150) + (z[43] << 0x158) + (z[44] << 0x160) + (z[45] << 0x168) + (z[46] << 0x170) + (z[47] << 0x178) + (z[48] << 0x180) + (z[49] << 0x188) + (z[50] << 0x190) + (z[51] << 0x198) + (z[52] << 0x1a0) + (z[53] << 0x1a8) + (z[54] << 0x1b0) + (z[55] << 0x1b8) + (z[56] << 0x1c0) + (z[57] << 0x1c8) + (z[58] << 0x1d0) + (z[59] << 0x1d8) + (z[60] << 0x1e0) + (z[61] << 0x1e8) + (z[62] << 0x1f0) + (z[63] << 0x1f8) + (z[64] << 2^9) + (z[65] << 0x208) */ +/* balance = [0x7fffffffffffffe, 0x7fffffffffffffe, 0x7fffffffffffffe, 0x7fffffffffffffe, 0x7fffffffffffffe, 0x7fffffffffffffe, 0x7fffffffffffffe, 0x7fffffffffffffe, 0x3fffffffffffffe] */ #include <stdint.h> typedef unsigned char fiat_secp521r1_uint1; typedef signed char fiat_secp521r1_int1; -typedef signed __int128 fiat_secp521r1_int128; -typedef unsigned __int128 fiat_secp521r1_uint128; +#ifdef __GNUC__ +#define FIAT_SECP521R1_FIAT_EXTENSION __extension__ +#define FIAT_SECP521R1_FIAT_INLINE __inline__ +#else +#define FIAT_SECP521R1_FIAT_EXTENSION +#define FIAT_SECP521R1_FIAT_INLINE +#endif + +FIAT_SECP521R1_FIAT_EXTENSION typedef signed __int128 fiat_secp521r1_int128; +FIAT_SECP521R1_FIAT_EXTENSION typedef unsigned __int128 fiat_secp521r1_uint128; + +/* The type fiat_secp521r1_loose_field_element is a field element with loose bounds. */ +/* Bounds: [[0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0x600000000000000]] */ +typedef uint64_t fiat_secp521r1_loose_field_element[9]; + +/* The type fiat_secp521r1_tight_field_element is a field element with tight bounds. */ +/* Bounds: [[0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x200000000000000]] */ +typedef uint64_t fiat_secp521r1_tight_field_element[9]; #if (-1 & 3) != 3 #error "This code only works on a two's complement system" @@ -120,6 +129,7 @@ fiat_secp521r1_value_barrier_u64(uint64_t a) /* * The function fiat_secp521r1_addcarryx_u58 is an addition with carry. + * * Postconditions: * out1 = (arg1 + arg2 + arg3) mod 2^58 * out2 = ⌊(arg1 + arg2 + arg3) / 2^58⌋ @@ -150,6 +160,7 @@ fiat_secp521r1_addcarryx_u58(uint64_t *out1, /* * The function fiat_secp521r1_subborrowx_u58 is a subtraction with borrow. + * * Postconditions: * out1 = (-arg1 + arg2 + -arg3) mod 2^58 * out2 = -⌊(-arg1 + arg2 + -arg3) / 2^58⌋ @@ -180,6 +191,7 @@ fiat_secp521r1_subborrowx_u58(uint64_t *out1, /* * The function fiat_secp521r1_addcarryx_u57 is an addition with carry. + * * Postconditions: * out1 = (arg1 + arg2 + arg3) mod 2^57 * out2 = ⌊(arg1 + arg2 + arg3) / 2^57⌋ @@ -210,6 +222,7 @@ fiat_secp521r1_addcarryx_u57(uint64_t *out1, /* * The function fiat_secp521r1_subborrowx_u57 is a subtraction with borrow. + * * Postconditions: * out1 = (-arg1 + arg2 + -arg3) mod 2^57 * out2 = -⌊(-arg1 + arg2 + -arg3) / 2^57⌋ @@ -240,6 +253,7 @@ fiat_secp521r1_subborrowx_u57(uint64_t *out1, /* * The function fiat_secp521r1_cmovznz_u64 is a single-word conditional move. + * * Postconditions: * out1 = (if arg1 = 0 then arg2 else arg3) * @@ -267,18 +281,16 @@ fiat_secp521r1_cmovznz_u64(uint64_t *out1, /* * The function fiat_secp521r1_carry_mul multiplies two field elements and reduces the result. + * * Postconditions: * eval out1 mod m = (eval arg1 * eval arg2) mod m * - * Input Bounds: - * arg1: [[0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0x600000000000000]] - * arg2: [[0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0x600000000000000]] - * Output Bounds: - * out1: [[0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x200000000000000]] */ static void -fiat_secp521r1_carry_mul(uint64_t out1[9], const uint64_t arg1[9], - const uint64_t arg2[9]) +fiat_secp521r1_carry_mul( + fiat_secp521r1_tight_field_element out1, + const fiat_secp521r1_loose_field_element arg1, + const fiat_secp521r1_loose_field_element arg2) { fiat_secp521r1_uint128 x1; fiat_secp521r1_uint128 x2; @@ -539,17 +551,15 @@ fiat_secp521r1_carry_mul(uint64_t out1[9], const uint64_t arg1[9], /* * The function fiat_secp521r1_carry_square squares a field element and reduces the result. + * * Postconditions: * eval out1 mod m = (eval arg1 * eval arg1) mod m * - * Input Bounds: - * arg1: [[0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0x600000000000000]] - * Output Bounds: - * out1: [[0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x200000000000000]] */ static void -fiat_secp521r1_carry_square(uint64_t out1[9], - const uint64_t arg1[9]) +fiat_secp521r1_carry_square( + fiat_secp521r1_tight_field_element out1, + const fiat_secp521r1_loose_field_element arg1) { uint64_t x1; uint64_t x2; @@ -769,17 +779,17 @@ fiat_secp521r1_carry_square(uint64_t out1[9], } /* - * The function fiat_secp521r1_carry reduces a field element. + * The function fiat_secp521r1_carry_add adds two field elements. + * * Postconditions: - * eval out1 mod m = eval arg1 mod m + * eval out1 mod m = (eval arg1 + eval arg2) mod m * - * Input Bounds: - * arg1: [[0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0x600000000000000]] - * Output Bounds: - * out1: [[0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x200000000000000]] */ static void -fiat_secp521r1_carry(uint64_t out1[9], const uint64_t arg1[9]) +fiat_secp521r1_carry_add( + fiat_secp521r1_tight_field_element out1, + const fiat_secp521r1_tight_field_element arg1, + const fiat_secp521r1_tight_field_element arg2) { uint64_t x1; uint64_t x2; @@ -801,15 +811,15 @@ fiat_secp521r1_carry(uint64_t out1[9], const uint64_t arg1[9]) uint64_t x18; uint64_t x19; uint64_t x20; - x1 = (arg1[0]); - x2 = ((x1 >> 58) + (arg1[1])); - x3 = ((x2 >> 58) + (arg1[2])); - x4 = ((x3 >> 58) + (arg1[3])); - x5 = ((x4 >> 58) + (arg1[4])); - x6 = ((x5 >> 58) + (arg1[5])); - x7 = ((x6 >> 58) + (arg1[6])); - x8 = ((x7 >> 58) + (arg1[7])); - x9 = ((x8 >> 58) + (arg1[8])); + x1 = ((arg1[0]) + (arg2[0])); + x2 = ((x1 >> 58) + ((arg1[1]) + (arg2[1]))); + x3 = ((x2 >> 58) + ((arg1[2]) + (arg2[2]))); + x4 = ((x3 >> 58) + ((arg1[3]) + (arg2[3]))); + x5 = ((x4 >> 58) + ((arg1[4]) + (arg2[4]))); + x6 = ((x5 >> 58) + ((arg1[5]) + (arg2[5]))); + x7 = ((x6 >> 58) + ((arg1[6]) + (arg2[6]))); + x8 = ((x7 >> 58) + ((arg1[7]) + (arg2[7]))); + x9 = ((x8 >> 58) + ((arg1[8]) + (arg2[8]))); x10 = ((x1 & UINT64_C(0x3ffffffffffffff)) + (x9 >> 57)); x11 = ((fiat_secp521r1_uint1)(x10 >> 58) + (x2 & UINT64_C(0x3ffffffffffffff))); @@ -835,63 +845,17 @@ fiat_secp521r1_carry(uint64_t out1[9], const uint64_t arg1[9]) } /* - * The function fiat_secp521r1_add adds two field elements. - * Postconditions: - * eval out1 mod m = (eval arg1 + eval arg2) mod m + * The function fiat_secp521r1_carry_sub subtracts two field elements. * - * Input Bounds: - * arg1: [[0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x200000000000000]] - * arg2: [[0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x200000000000000]] - * Output Bounds: - * out1: [[0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0x600000000000000]] - */ -static void -fiat_secp521r1_add(uint64_t out1[9], const uint64_t arg1[9], - const uint64_t arg2[9]) -{ - uint64_t x1; - uint64_t x2; - uint64_t x3; - uint64_t x4; - uint64_t x5; - uint64_t x6; - uint64_t x7; - uint64_t x8; - uint64_t x9; - x1 = ((arg1[0]) + (arg2[0])); - x2 = ((arg1[1]) + (arg2[1])); - x3 = ((arg1[2]) + (arg2[2])); - x4 = ((arg1[3]) + (arg2[3])); - x5 = ((arg1[4]) + (arg2[4])); - x6 = ((arg1[5]) + (arg2[5])); - x7 = ((arg1[6]) + (arg2[6])); - x8 = ((arg1[7]) + (arg2[7])); - x9 = ((arg1[8]) + (arg2[8])); - out1[0] = x1; - out1[1] = x2; - out1[2] = x3; - out1[3] = x4; - out1[4] = x5; - out1[5] = x6; - out1[6] = x7; - out1[7] = x8; - out1[8] = x9; -} - -/* - * The function fiat_secp521r1_sub subtracts two field elements. * Postconditions: * eval out1 mod m = (eval arg1 - eval arg2) mod m * - * Input Bounds: - * arg1: [[0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x200000000000000]] - * arg2: [[0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x200000000000000]] - * Output Bounds: - * out1: [[0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0x600000000000000]] */ static void -fiat_secp521r1_sub(uint64_t out1[9], const uint64_t arg1[9], - const uint64_t arg2[9]) +fiat_secp521r1_carry_sub( + fiat_secp521r1_tight_field_element out1, + const fiat_secp521r1_tight_field_element arg1, + const fiat_secp521r1_tight_field_element arg2) { uint64_t x1; uint64_t x2; @@ -902,38 +866,61 @@ fiat_secp521r1_sub(uint64_t out1[9], const uint64_t arg1[9], uint64_t x7; uint64_t x8; uint64_t x9; + uint64_t x10; + uint64_t x11; + uint64_t x12; + uint64_t x13; + uint64_t x14; + uint64_t x15; + uint64_t x16; + uint64_t x17; + uint64_t x18; + uint64_t x19; + uint64_t x20; x1 = ((UINT64_C(0x7fffffffffffffe) + (arg1[0])) - (arg2[0])); - x2 = ((UINT64_C(0x7fffffffffffffe) + (arg1[1])) - (arg2[1])); - x3 = ((UINT64_C(0x7fffffffffffffe) + (arg1[2])) - (arg2[2])); - x4 = ((UINT64_C(0x7fffffffffffffe) + (arg1[3])) - (arg2[3])); - x5 = ((UINT64_C(0x7fffffffffffffe) + (arg1[4])) - (arg2[4])); - x6 = ((UINT64_C(0x7fffffffffffffe) + (arg1[5])) - (arg2[5])); - x7 = ((UINT64_C(0x7fffffffffffffe) + (arg1[6])) - (arg2[6])); - x8 = ((UINT64_C(0x7fffffffffffffe) + (arg1[7])) - (arg2[7])); - x9 = ((UINT64_C(0x3fffffffffffffe) + (arg1[8])) - (arg2[8])); - out1[0] = x1; - out1[1] = x2; - out1[2] = x3; - out1[3] = x4; - out1[4] = x5; - out1[5] = x6; - out1[6] = x7; - out1[7] = x8; - out1[8] = x9; + x2 = ((x1 >> 58) + ((UINT64_C(0x7fffffffffffffe) + (arg1[1])) - (arg2[1]))); + x3 = ((x2 >> 58) + ((UINT64_C(0x7fffffffffffffe) + (arg1[2])) - (arg2[2]))); + x4 = ((x3 >> 58) + ((UINT64_C(0x7fffffffffffffe) + (arg1[3])) - (arg2[3]))); + x5 = ((x4 >> 58) + ((UINT64_C(0x7fffffffffffffe) + (arg1[4])) - (arg2[4]))); + x6 = ((x5 >> 58) + ((UINT64_C(0x7fffffffffffffe) + (arg1[5])) - (arg2[5]))); + x7 = ((x6 >> 58) + ((UINT64_C(0x7fffffffffffffe) + (arg1[6])) - (arg2[6]))); + x8 = ((x7 >> 58) + ((UINT64_C(0x7fffffffffffffe) + (arg1[7])) - (arg2[7]))); + x9 = ((x8 >> 58) + ((UINT64_C(0x3fffffffffffffe) + (arg1[8])) - (arg2[8]))); + x10 = ((x1 & UINT64_C(0x3ffffffffffffff)) + (x9 >> 57)); + x11 = ((fiat_secp521r1_uint1)(x10 >> 58) + + (x2 & UINT64_C(0x3ffffffffffffff))); + x12 = (x10 & UINT64_C(0x3ffffffffffffff)); + x13 = (x11 & UINT64_C(0x3ffffffffffffff)); + x14 = ((fiat_secp521r1_uint1)(x11 >> 58) + + (x3 & UINT64_C(0x3ffffffffffffff))); + x15 = (x4 & UINT64_C(0x3ffffffffffffff)); + x16 = (x5 & UINT64_C(0x3ffffffffffffff)); + x17 = (x6 & UINT64_C(0x3ffffffffffffff)); + x18 = (x7 & UINT64_C(0x3ffffffffffffff)); + x19 = (x8 & UINT64_C(0x3ffffffffffffff)); + x20 = (x9 & UINT64_C(0x1ffffffffffffff)); + out1[0] = x12; + out1[1] = x13; + out1[2] = x14; + out1[3] = x15; + out1[4] = x16; + out1[5] = x17; + out1[6] = x18; + out1[7] = x19; + out1[8] = x20; } /* - * The function fiat_secp521r1_opp negates a field element. + * The function fiat_secp521r1_carry_opp negates a field element. + * * Postconditions: * eval out1 mod m = -eval arg1 mod m * - * Input Bounds: - * arg1: [[0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x200000000000000]] - * Output Bounds: - * out1: [[0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0x600000000000000]] */ static void -fiat_secp521r1_opp(uint64_t out1[9], const uint64_t arg1[9]) +fiat_secp521r1_carry_opp( + fiat_secp521r1_tight_field_element out1, + const fiat_secp521r1_tight_field_element arg1) { uint64_t x1; uint64_t x2; @@ -944,28 +931,62 @@ fiat_secp521r1_opp(uint64_t out1[9], const uint64_t arg1[9]) uint64_t x7; uint64_t x8; uint64_t x9; + uint64_t x10; + uint64_t x11; + uint64_t x12; + uint64_t x13; + uint64_t x14; + uint64_t x15; + uint64_t x16; + uint64_t x17; + uint64_t x18; + uint64_t x19; + uint64_t x20; x1 = (UINT64_C(0x7fffffffffffffe) - (arg1[0])); - x2 = (UINT64_C(0x7fffffffffffffe) - (arg1[1])); - x3 = (UINT64_C(0x7fffffffffffffe) - (arg1[2])); - x4 = (UINT64_C(0x7fffffffffffffe) - (arg1[3])); - x5 = (UINT64_C(0x7fffffffffffffe) - (arg1[4])); - x6 = (UINT64_C(0x7fffffffffffffe) - (arg1[5])); - x7 = (UINT64_C(0x7fffffffffffffe) - (arg1[6])); - x8 = (UINT64_C(0x7fffffffffffffe) - (arg1[7])); - x9 = (UINT64_C(0x3fffffffffffffe) - (arg1[8])); - out1[0] = x1; - out1[1] = x2; - out1[2] = x3; - out1[3] = x4; - out1[4] = x5; - out1[5] = x6; - out1[6] = x7; - out1[7] = x8; - out1[8] = x9; + x2 = ((fiat_secp521r1_uint1)(x1 >> 58) + + (UINT64_C(0x7fffffffffffffe) - (arg1[1]))); + x3 = ((fiat_secp521r1_uint1)(x2 >> 58) + + (UINT64_C(0x7fffffffffffffe) - (arg1[2]))); + x4 = ((fiat_secp521r1_uint1)(x3 >> 58) + + (UINT64_C(0x7fffffffffffffe) - (arg1[3]))); + x5 = ((fiat_secp521r1_uint1)(x4 >> 58) + + (UINT64_C(0x7fffffffffffffe) - (arg1[4]))); + x6 = ((fiat_secp521r1_uint1)(x5 >> 58) + + (UINT64_C(0x7fffffffffffffe) - (arg1[5]))); + x7 = ((fiat_secp521r1_uint1)(x6 >> 58) + + (UINT64_C(0x7fffffffffffffe) - (arg1[6]))); + x8 = ((fiat_secp521r1_uint1)(x7 >> 58) + + (UINT64_C(0x7fffffffffffffe) - (arg1[7]))); + x9 = ((fiat_secp521r1_uint1)(x8 >> 58) + + (UINT64_C(0x3fffffffffffffe) - (arg1[8]))); + x10 = ((x1 & UINT64_C(0x3ffffffffffffff)) + + (uint64_t)(fiat_secp521r1_uint1)(x9 >> 57)); + x11 = ((fiat_secp521r1_uint1)(x10 >> 58) + + (x2 & UINT64_C(0x3ffffffffffffff))); + x12 = (x10 & UINT64_C(0x3ffffffffffffff)); + x13 = (x11 & UINT64_C(0x3ffffffffffffff)); + x14 = ((fiat_secp521r1_uint1)(x11 >> 58) + + (x3 & UINT64_C(0x3ffffffffffffff))); + x15 = (x4 & UINT64_C(0x3ffffffffffffff)); + x16 = (x5 & UINT64_C(0x3ffffffffffffff)); + x17 = (x6 & UINT64_C(0x3ffffffffffffff)); + x18 = (x7 & UINT64_C(0x3ffffffffffffff)); + x19 = (x8 & UINT64_C(0x3ffffffffffffff)); + x20 = (x9 & UINT64_C(0x1ffffffffffffff)); + out1[0] = x12; + out1[1] = x13; + out1[2] = x14; + out1[3] = x15; + out1[4] = x16; + out1[5] = x17; + out1[6] = x18; + out1[7] = x19; + out1[8] = x20; } /* * The function fiat_secp521r1_selectznz is a multi-limb conditional select. + * * Postconditions: * eval out1 = (if arg1 = 0 then eval arg2 else eval arg3) * @@ -1013,16 +1034,16 @@ fiat_secp521r1_selectznz(uint64_t out1[9], /* * The function fiat_secp521r1_to_bytes serializes a field element to bytes in little-endian order. + * * Postconditions: * out1 = map (λ x, ⌊((eval arg1 mod m) mod 2^(8 * (x + 1))) / 2^(8 * x)⌋) [0..65] * - * Input Bounds: - * arg1: [[0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x200000000000000]] * Output Bounds: * out1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0x1]] */ static void -fiat_secp521r1_to_bytes(uint8_t out1[66], const uint64_t arg1[9]) +fiat_secp521r1_to_bytes( + uint8_t out1[66], const fiat_secp521r1_tight_field_element arg1) { uint64_t x1; fiat_secp521r1_uint1 x2; @@ -1444,16 +1465,15 @@ fiat_secp521r1_to_bytes(uint8_t out1[66], const uint64_t arg1[9]) /* * The function fiat_secp521r1_from_bytes deserializes a field element from bytes in little-endian order. + * * Postconditions: * eval out1 mod m = bytes_eval arg1 mod m * * Input Bounds: * arg1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0x1]] - * Output Bounds: - * out1: [[0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x200000000000000]] */ static void -fiat_secp521r1_from_bytes(uint64_t out1[9], +fiat_secp521r1_from_bytes(fiat_secp521r1_tight_field_element out1, const uint8_t arg1[66]) { uint64_t x1; @@ -1751,67 +1771,7 @@ fiat_secp521r1_from_bytes(uint64_t out1[9], /* END verbatim fiat code */ -/*- - * Finite field inversion via FLT. - * NB: this is not a real Fiat function, just named that way for consistency. - * Autogenerated: ecp/secp521r1/fe_inv.op3 - * custom repunit addition chain - */ -static void -fiat_secp521r1_inv(fe_t output, const fe_t t1) -{ - int i; - /* temporary variables */ - fe_t acc, t128, t16, t2, t256, t32, t4, t512, t516, t518, t519, t64, t8; - - fiat_secp521r1_carry_square(acc, t1); - fiat_secp521r1_carry_mul(t2, acc, t1); - fiat_secp521r1_carry_square(acc, t2); - fiat_secp521r1_carry_square(acc, acc); - fiat_secp521r1_carry_mul(t4, acc, t2); - fiat_secp521r1_carry_square(acc, t4); - for (i = 0; i < 3; i++) - fiat_secp521r1_carry_square(acc, acc); - fiat_secp521r1_carry_mul(t8, acc, t4); - fiat_secp521r1_carry_square(acc, t8); - for (i = 0; i < 7; i++) - fiat_secp521r1_carry_square(acc, acc); - fiat_secp521r1_carry_mul(t16, acc, t8); - fiat_secp521r1_carry_square(acc, t16); - for (i = 0; i < 15; i++) - fiat_secp521r1_carry_square(acc, acc); - fiat_secp521r1_carry_mul(t32, acc, t16); - fiat_secp521r1_carry_square(acc, t32); - for (i = 0; i < 31; i++) - fiat_secp521r1_carry_square(acc, acc); - fiat_secp521r1_carry_mul(t64, acc, t32); - fiat_secp521r1_carry_square(acc, t64); - for (i = 0; i < 63; i++) - fiat_secp521r1_carry_square(acc, acc); - fiat_secp521r1_carry_mul(t128, acc, t64); - fiat_secp521r1_carry_square(acc, t128); - for (i = 0; i < 127; i++) - fiat_secp521r1_carry_square(acc, acc); - fiat_secp521r1_carry_mul(t256, acc, t128); - fiat_secp521r1_carry_square(acc, t256); - for (i = 0; i < 255; i++) - fiat_secp521r1_carry_square(acc, acc); - fiat_secp521r1_carry_mul(t512, acc, t256); - fiat_secp521r1_carry_square(acc, t512); - for (i = 0; i < 3; i++) - fiat_secp521r1_carry_square(acc, acc); - fiat_secp521r1_carry_mul(t516, acc, t4); - fiat_secp521r1_carry_square(acc, t516); - fiat_secp521r1_carry_square(acc, acc); - fiat_secp521r1_carry_mul(t518, acc, t2); - fiat_secp521r1_carry_square(acc, t518); - fiat_secp521r1_carry_mul(t519, acc, t1); - fiat_secp521r1_carry_square(acc, t519); - fiat_secp521r1_carry_square(acc, acc); - fiat_secp521r1_carry_mul(output, acc, t1); -} - -/* curve coefficient constants */ +/* curve-related constants */ static const limb_t const_one[9] = { UINT64_C(0x0000000000000001), UINT64_C(0x0000000000000000), @@ -3940,6 +3900,67 @@ static const pt_aff_t lut_cmb[13][16] = { }; /*- + * Finite field inversion. + * Computed with exponentiation via FLT. + * Autogenerated: ecp/secp521r1/fe_inv.op3 + * custom repunit addition chain + * NB: this is not a real fiat-crypto function, just named that way for consistency. + */ +static void +fiat_secp521r1_inv(fe_t output, const fe_t t1) +{ + int i; + /* temporary variables */ + fe_t acc, t128, t16, t2, t256, t32, t4, t512, t516, t518, t519, t64, t8; + + fiat_secp521r1_carry_square(acc, t1); + fiat_secp521r1_carry_mul(t2, acc, t1); + fiat_secp521r1_carry_square(acc, t2); + fiat_secp521r1_carry_square(acc, acc); + fiat_secp521r1_carry_mul(t4, acc, t2); + fiat_secp521r1_carry_square(acc, t4); + for (i = 0; i < 3; i++) + fiat_secp521r1_carry_square(acc, acc); + fiat_secp521r1_carry_mul(t8, acc, t4); + fiat_secp521r1_carry_square(acc, t8); + for (i = 0; i < 7; i++) + fiat_secp521r1_carry_square(acc, acc); + fiat_secp521r1_carry_mul(t16, acc, t8); + fiat_secp521r1_carry_square(acc, t16); + for (i = 0; i < 15; i++) + fiat_secp521r1_carry_square(acc, acc); + fiat_secp521r1_carry_mul(t32, acc, t16); + fiat_secp521r1_carry_square(acc, t32); + for (i = 0; i < 31; i++) + fiat_secp521r1_carry_square(acc, acc); + fiat_secp521r1_carry_mul(t64, acc, t32); + fiat_secp521r1_carry_square(acc, t64); + for (i = 0; i < 63; i++) + fiat_secp521r1_carry_square(acc, acc); + fiat_secp521r1_carry_mul(t128, acc, t64); + fiat_secp521r1_carry_square(acc, t128); + for (i = 0; i < 127; i++) + fiat_secp521r1_carry_square(acc, acc); + fiat_secp521r1_carry_mul(t256, acc, t128); + fiat_secp521r1_carry_square(acc, t256); + for (i = 0; i < 255; i++) + fiat_secp521r1_carry_square(acc, acc); + fiat_secp521r1_carry_mul(t512, acc, t256); + fiat_secp521r1_carry_square(acc, t512); + for (i = 0; i < 3; i++) + fiat_secp521r1_carry_square(acc, acc); + fiat_secp521r1_carry_mul(t516, acc, t4); + fiat_secp521r1_carry_square(acc, t516); + fiat_secp521r1_carry_square(acc, acc); + fiat_secp521r1_carry_mul(t518, acc, t2); + fiat_secp521r1_carry_square(acc, t518); + fiat_secp521r1_carry_mul(t519, acc, t1); + fiat_secp521r1_carry_square(acc, t519); + fiat_secp521r1_carry_square(acc, acc); + fiat_secp521r1_carry_mul(output, acc, t1); +} + +/*- * Q := 2P, both projective, Q and P same pointers OK * Autogenerated: op3/dbl_proj.op3 * https://eprint.iacr.org/2015/1060 Alg 6 @@ -4000,7 +4021,7 @@ point_double(pt_prj_t *Q, const pt_prj_t *P) /*- * out1 = (arg1 == 0) ? 0 : nz * NB: this is not a "mod p equiv" 0, but literal 0 - * NB: this is not a real Fiat function, just named that way for consistency. + * NB: this is not a real fiat-crypto function, just named that way for consistency. */ static void fiat_secp521r1_nonzero(limb_t *out1, const fe_t arg1) @@ -4261,7 +4282,7 @@ var_smul_wnaf_two(pt_aff_t *out, const unsigned char a[66], int i, d, is_neg, is_inf = 1, flipped = 0; int8_t anaf[529] = { 0 }; int8_t bnaf[529] = { 0 }; - pt_prj_t Q = { { 0 } }; + pt_prj_t Q = { { 0 }, { 0 }, { 0 } }; pt_prj_t precomp[DRADIX / 2]; precomp_wnaf(precomp, P); @@ -4273,7 +4294,7 @@ var_smul_wnaf_two(pt_aff_t *out, const unsigned char a[66], point_double(&Q, &Q); if ((d = bnaf[i])) { if ((is_neg = d < 0) != flipped) { - fiat_secp521r1_opp(Q.Y, Q.Y); + fiat_secp521r1_carry_opp(Q.Y, Q.Y); flipped ^= 1; } d = (is_neg) ? (-d - 1) >> 1 : (d - 1) >> 1; @@ -4288,7 +4309,7 @@ var_smul_wnaf_two(pt_aff_t *out, const unsigned char a[66], } if ((d = anaf[i])) { if ((is_neg = d < 0) != flipped) { - fiat_secp521r1_opp(Q.Y, Q.Y); + fiat_secp521r1_carry_opp(Q.Y, Q.Y); flipped ^= 1; } d = (is_neg) ? (-d - 1) >> 1 : (d - 1) >> 1; @@ -4312,7 +4333,7 @@ var_smul_wnaf_two(pt_aff_t *out, const unsigned char a[66], if (flipped) { /* correct sign */ - fiat_secp521r1_opp(Q.Y, Q.Y); + fiat_secp521r1_carry_opp(Q.Y, Q.Y); } /* convert to affine -- NB depends on coordinate system */ @@ -4323,6 +4344,8 @@ var_smul_wnaf_two(pt_aff_t *out, const unsigned char a[66], /*- * Variable point scalar multiplication with "regular" wnaf. + * Here "regular" means _no zeroes_, so the sequence of + * EC arithmetic ops is fixed. */ static void var_smul_rwnaf(pt_aff_t *out, const unsigned char scalar[66], @@ -4330,14 +4353,14 @@ var_smul_rwnaf(pt_aff_t *out, const unsigned char scalar[66], { int i, j, d, diff, is_neg; int8_t rnaf[106] = { 0 }; - pt_prj_t Q = { { 0 } }, lut = { { 0 } }; + pt_prj_t Q = { { 0 }, { 0 }, { 0 } }, lut = { { 0 }, { 0 }, { 0 } }; pt_prj_t precomp[DRADIX / 2]; precomp_wnaf(precomp, P); scalar_rwnaf(rnaf, scalar); #if defined(_MSC_VER) -/* result still unsigned: yes we know */ + /* result still unsigned: yes we know */ #pragma warning(push) #pragma warning(disable : 4146) #endif @@ -4367,7 +4390,7 @@ var_smul_rwnaf(pt_aff_t *out, const unsigned char scalar[66], fiat_secp521r1_selectznz(lut.Z, diff, lut.Z, precomp[j].Z); } /* negate lut point if digit is negative */ - fiat_secp521r1_opp(out->Y, lut.Y); + fiat_secp521r1_carry_opp(out->Y, lut.Y); fiat_secp521r1_selectznz(lut.Y, is_neg, lut.Y, out->Y); point_add_proj(&Q, &Q, &lut); } @@ -4378,7 +4401,7 @@ var_smul_rwnaf(pt_aff_t *out, const unsigned char scalar[66], /* conditionally subtract P if the scalar was even */ fe_copy(lut.X, precomp[0].X); - fiat_secp521r1_opp(lut.Y, precomp[0].Y); + fiat_secp521r1_carry_opp(lut.Y, precomp[0].Y); fe_copy(lut.Z, precomp[0].Z); point_add_proj(&lut, &lut, &Q); fiat_secp521r1_selectznz(Q.X, scalar[0] & 1, lut.X, Q.X); @@ -4399,8 +4422,8 @@ fixed_smul_cmb(pt_aff_t *out, const unsigned char scalar[66]) { int i, j, k, d, diff, is_neg = 0; int8_t rnaf[106] = { 0 }; - pt_prj_t Q = { { 0 } }, R = { { 0 } }; - pt_aff_t lut = { { 0 } }; + pt_prj_t Q = { { 0 }, { 0 }, { 0 } }, R = { { 0 }, { 0 }, { 0 } }; + pt_aff_t lut = { { 0 }, { 0 } }; scalar_rwnaf(rnaf, scalar); @@ -4410,7 +4433,7 @@ fixed_smul_cmb(pt_aff_t *out, const unsigned char scalar[66]) fe_set_zero(Q.Z); #if defined(_MSC_VER) -/* result still unsigned: yes we know */ + /* result still unsigned: yes we know */ #pragma warning(push) #pragma warning(disable : 4146) #endif @@ -4433,7 +4456,7 @@ fixed_smul_cmb(pt_aff_t *out, const unsigned char scalar[66]) fiat_secp521r1_selectznz(lut.Y, diff, lut.Y, lut_cmb[j][k].Y); } /* negate lut point if digit is negative */ - fiat_secp521r1_opp(out->Y, lut.Y); + fiat_secp521r1_carry_opp(out->Y, lut.Y); fiat_secp521r1_selectznz(lut.Y, is_neg, lut.Y, out->Y); point_add_mixed(&Q, &Q, &lut); } @@ -4445,7 +4468,7 @@ fixed_smul_cmb(pt_aff_t *out, const unsigned char scalar[66]) /* conditionally subtract P if the scalar was even */ fe_copy(lut.X, lut_cmb[0][0].X); - fiat_secp521r1_opp(lut.Y, lut_cmb[0][0].Y); + fiat_secp521r1_carry_opp(lut.Y, lut_cmb[0][0].Y); point_add_mixed(&R, &Q, &lut); fiat_secp521r1_selectznz(Q.X, scalar[0] & 1, R.X, Q.X); fiat_secp521r1_selectznz(Q.Y, scalar[0] & 1, R.Y, Q.Y); @@ -4464,10 +4487,11 @@ fixed_smul_cmb(pt_aff_t *out, const unsigned char scalar[66]) * Everything is LE byte ordering. */ static void -point_mul_two(unsigned char outx[66], unsigned char outy[66], - const unsigned char a[66], const unsigned char b[66], - const unsigned char inx[66], - const unsigned char iny[66]) +point_mul_two_secp521r1(unsigned char outx[66], unsigned char outy[66], + const unsigned char a[66], + const unsigned char b[66], + const unsigned char inx[66], + const unsigned char iny[66]) { pt_aff_t P; @@ -4486,8 +4510,8 @@ point_mul_two(unsigned char outx[66], unsigned char outy[66], * Everything is LE byte ordering. */ static void -point_mul_g(unsigned char outx[66], unsigned char outy[66], - const unsigned char scalar[66]) +point_mul_g_secp521r1(unsigned char outx[66], unsigned char outy[66], + const unsigned char scalar[66]) { pt_aff_t P; @@ -4504,10 +4528,10 @@ point_mul_g(unsigned char outx[66], unsigned char outy[66], * Everything is LE byte ordering. */ static void -point_mul(unsigned char outx[66], unsigned char outy[66], - const unsigned char scalar[66], - const unsigned char inx[66], - const unsigned char iny[66]) +point_mul_secp521r1(unsigned char outx[66], unsigned char outy[66], + const unsigned char scalar[66], + const unsigned char inx[66], + const unsigned char iny[66]) { pt_aff_t P; @@ -4632,8 +4656,8 @@ point_mul(unsigned char outx[66], unsigned char outy[66], } while (0) static mp_err -point_mul_g_secp521r1(const mp_int *n, mp_int *out_x, - mp_int *out_y, const ECGroup *group) +point_mul_g_secp521r1_wrap(const mp_int *n, mp_int *out_x, + mp_int *out_y, const ECGroup *group) { unsigned char b_x[66]; unsigned char b_y[66]; @@ -4648,7 +4672,7 @@ point_mul_g_secp521r1(const mp_int *n, mp_int *out_x, MP_CHECKOK(mp_to_fixlen_octets(n, b_n, 66)); MP_BE2LE(b_n); - point_mul_g(b_x, b_y, b_n); + point_mul_g_secp521r1(b_x, b_y, b_n); MP_BE2LE(b_x); MP_BE2LE(b_y); MP_CHECKOK(mp_read_unsigned_octets(out_x, b_x, 66)); @@ -4659,9 +4683,9 @@ CLEANUP: } static mp_err -point_mul_secp521r1(const mp_int *n, const mp_int *in_x, - const mp_int *in_y, mp_int *out_x, - mp_int *out_y, const ECGroup *group) +point_mul_secp521r1_wrap(const mp_int *n, const mp_int *in_x, + const mp_int *in_y, mp_int *out_x, + mp_int *out_y, const ECGroup *group) { unsigned char b_x[66]; unsigned char b_y[66]; @@ -4682,7 +4706,7 @@ point_mul_secp521r1(const mp_int *n, const mp_int *in_x, MP_BE2LE(b_x); MP_BE2LE(b_y); MP_BE2LE(b_n); - point_mul(b_x, b_y, b_n, b_x, b_y); + point_mul_secp521r1(b_x, b_y, b_n, b_x, b_y); MP_BE2LE(b_x); MP_BE2LE(b_y); MP_CHECKOK(mp_read_unsigned_octets(out_x, b_x, 66)); @@ -4693,10 +4717,11 @@ CLEANUP: } static mp_err -point_mul_two_secp521r1(const mp_int *n1, const mp_int *n2, - const mp_int *in_x, const mp_int *in_y, - mp_int *out_x, mp_int *out_y, - const ECGroup *group) +point_mul_two_secp521r1_wrap(const mp_int *n1, const mp_int *n2, + const mp_int *in_x, + const mp_int *in_y, mp_int *out_x, + mp_int *out_y, + const ECGroup *group) { unsigned char b_x[66]; unsigned char b_y[66]; @@ -4706,11 +4731,11 @@ point_mul_two_secp521r1(const mp_int *n1, const mp_int *n2, /* If n2 == NULL or 0, this is just a base-point multiplication. */ if (n2 == NULL || mp_cmp_z(n2) == MP_EQ) - return point_mul_g_secp521r1(n1, out_x, out_y, group); + return point_mul_g_secp521r1_wrap(n1, out_x, out_y, group); /* If n1 == NULL or 0, this is just an arbitary-point multiplication. */ if (n1 == NULL || mp_cmp_z(n1) == MP_EQ) - return point_mul_secp521r1(n2, in_x, in_y, out_x, out_y, group); + return point_mul_secp521r1_wrap(n2, in_x, in_y, out_x, out_y, group); ARGCHK(in_x != NULL && in_y != NULL && out_x != NULL && out_y != NULL, MP_BADARG); @@ -4728,7 +4753,7 @@ point_mul_two_secp521r1(const mp_int *n1, const mp_int *n2, MP_BE2LE(b_y); MP_BE2LE(b_n1); MP_BE2LE(b_n2); - point_mul_two(b_x, b_y, b_n1, b_n2, b_x, b_y); + point_mul_two_secp521r1(b_x, b_y, b_n1, b_n2, b_x, b_y); MP_BE2LE(b_x); MP_BE2LE(b_y); MP_CHECKOK(mp_read_unsigned_octets(out_x, b_x, 66)); @@ -4742,9 +4767,9 @@ mp_err ec_group_set_secp521r1(ECGroup *group, ECCurveName name) { if (name == ECCurve_NIST_P521) { - group->base_point_mul = &point_mul_g_secp521r1; - group->point_mul = &point_mul_secp521r1; - group->points_mul = &point_mul_two_secp521r1; + group->base_point_mul = &point_mul_g_secp521r1_wrap; + group->point_mul = &point_mul_secp521r1_wrap; + group->points_mul = &point_mul_two_secp521r1_wrap; } return MP_OKAY; } @@ -4762,13 +4787,6 @@ typedef uint32_t limb_t; #define fe_copy(d, s) memcpy(d, s, sizeof(fe_t)) #define fe_set_zero(d) memset(d, 0, sizeof(fe_t)) -#define fiat_secp521r1_carry_add(c, a, b) \ - fiat_secp521r1_add(c, a, b); \ - fiat_secp521r1_carry(c, c) -#define fiat_secp521r1_carry_sub(c, a, b) \ - fiat_secp521r1_sub(c, a, b); \ - fiat_secp521r1_carry(c, c) - /* Projective points */ typedef struct { fe_t X; @@ -4817,14 +4835,27 @@ typedef struct { /* tight_bounds_multiplier = 1 (from "") */ /* */ /* Computed values: */ -/* carry_chain = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 0, 1] */ -/* eval z = z[0] + (z[1] << 28) + (z[2] << 55) + (z[3] << 83) + (z[4] << 110) + (z[5] << 138) + (z[6] << 165) + (z[7] << 192) + (z[8] << 220) + (z[9] << 247) + (z[10] << 0x113) + (z[11] << 0x12e) + (z[12] << 0x14a) + (z[13] << 0x165) + (z[14] << 0x180) + (z[15] << 0x19c) + (z[16] << 0x1b7) + (z[17] << 0x1d3) + (z[18] << 0x1ee) */ -/* bytes_eval z = z[0] + (z[1] << 8) + (z[2] << 16) + (z[3] << 24) + (z[4] << 32) + (z[5] << 40) + (z[6] << 48) + (z[7] << 56) + (z[8] << 64) + (z[9] << 72) + (z[10] << 80) + (z[11] << 88) + (z[12] << 96) + (z[13] << 104) + (z[14] << 112) + (z[15] << 120) + (z[16] << 128) + (z[17] << 136) + (z[18] << 144) + (z[19] << 152) + (z[20] << 160) + (z[21] << 168) + (z[22] << 176) + (z[23] << 184) + (z[24] << 192) + (z[25] << 200) + (z[26] << 208) + (z[27] << 216) + (z[28] << 224) + (z[29] << 232) + (z[30] << 240) + (z[31] << 248) + (z[32] << 256) + (z[33] << 0x108) + (z[34] << 0x110) + (z[35] << 0x118) + (z[36] << 0x120) + (z[37] << 0x128) + (z[38] << 0x130) + (z[39] << 0x138) + (z[40] << 0x140) + (z[41] << 0x148) + (z[42] << 0x150) + (z[43] << 0x158) + (z[44] << 0x160) + (z[45] << 0x168) + (z[46] << 0x170) + (z[47] << 0x178) + (z[48] << 0x180) + (z[49] << 0x188) + (z[50] << 0x190) + (z[51] << 0x198) + (z[52] << 0x1a0) + (z[53] << 0x1a8) + (z[54] << 0x1b0) + (z[55] << 0x1b8) + (z[56] << 0x1c0) + (z[57] << 0x1c8) + (z[58] << 0x1d0) + (z[59] << 0x1d8) + (z[60] << 0x1e0) + (z[61] << 0x1e8) + (z[62] << 0x1f0) + (z[63] << 0x1f8) + (z[64] << 2^9) + (z[65] << 0x208) */ -/* balance = [0x1ffffffe, 0xffffffe, 0x1ffffffe, 0xffffffe, 0x1ffffffe, 0xffffffe, 0xffffffe, 0x1ffffffe, 0xffffffe, 0x1ffffffe, 0xffffffe, 0x1ffffffe, 0xffffffe, 0xffffffe, 0x1ffffffe, 0xffffffe, 0x1ffffffe, 0xffffffe, 0xffffffe] */ +/* carry_chain = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 0, 1] */ +/* eval z = z[0] + (z[1] << 28) + (z[2] << 55) + (z[3] << 83) + (z[4] << 110) + (z[5] << 138) + (z[6] << 165) + (z[7] << 192) + (z[8] << 220) + (z[9] << 247) + (z[10] << 0x113) + (z[11] << 0x12e) + (z[12] << 0x14a) + (z[13] << 0x165) + (z[14] << 0x180) + (z[15] << 0x19c) + (z[16] << 0x1b7) + (z[17] << 0x1d3) + (z[18] << 0x1ee) */ +/* bytes_eval z = z[0] + (z[1] << 8) + (z[2] << 16) + (z[3] << 24) + (z[4] << 32) + (z[5] << 40) + (z[6] << 48) + (z[7] << 56) + (z[8] << 64) + (z[9] << 72) + (z[10] << 80) + (z[11] << 88) + (z[12] << 96) + (z[13] << 104) + (z[14] << 112) + (z[15] << 120) + (z[16] << 128) + (z[17] << 136) + (z[18] << 144) + (z[19] << 152) + (z[20] << 160) + (z[21] << 168) + (z[22] << 176) + (z[23] << 184) + (z[24] << 192) + (z[25] << 200) + (z[26] << 208) + (z[27] << 216) + (z[28] << 224) + (z[29] << 232) + (z[30] << 240) + (z[31] << 248) + (z[32] << 256) + (z[33] << 0x108) + (z[34] << 0x110) + (z[35] << 0x118) + (z[36] << 0x120) + (z[37] << 0x128) + (z[38] << 0x130) + (z[39] << 0x138) + (z[40] << 0x140) + (z[41] << 0x148) + (z[42] << 0x150) + (z[43] << 0x158) + (z[44] << 0x160) + (z[45] << 0x168) + (z[46] << 0x170) + (z[47] << 0x178) + (z[48] << 0x180) + (z[49] << 0x188) + (z[50] << 0x190) + (z[51] << 0x198) + (z[52] << 0x1a0) + (z[53] << 0x1a8) + (z[54] << 0x1b0) + (z[55] << 0x1b8) + (z[56] << 0x1c0) + (z[57] << 0x1c8) + (z[58] << 0x1d0) + (z[59] << 0x1d8) + (z[60] << 0x1e0) + (z[61] << 0x1e8) + (z[62] << 0x1f0) + (z[63] << 0x1f8) + (z[64] << 2^9) + (z[65] << 0x208) */ +/* balance = [0x1ffffffe, 0xffffffe, 0x1ffffffe, 0xffffffe, 0x1ffffffe, 0xffffffe, 0xffffffe, 0x1ffffffe, 0xffffffe, 0x1ffffffe, 0xffffffe, 0x1ffffffe, 0xffffffe, 0xffffffe, 0x1ffffffe, 0xffffffe, 0x1ffffffe, 0xffffffe, 0xffffffe] */ #include <stdint.h> typedef unsigned char fiat_secp521r1_uint1; typedef signed char fiat_secp521r1_int1; +#ifdef __GNUC__ +#define FIAT_SECP521R1_FIAT_INLINE __inline__ +#else +#define FIAT_SECP521R1_FIAT_INLINE +#endif + +/* The type fiat_secp521r1_loose_field_element is a field element with loose bounds. */ +/* Bounds: [[0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x18000000]] */ +typedef uint32_t fiat_secp521r1_loose_field_element[19]; + +/* The type fiat_secp521r1_tight_field_element is a field element with tight bounds. */ +/* Bounds: [[0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x8000000]] */ +typedef uint32_t fiat_secp521r1_tight_field_element[19]; #if (-1 & 3) != 3 #error "This code only works on a two's complement system" @@ -4845,6 +4876,7 @@ fiat_secp521r1_value_barrier_u32(uint32_t a) /* * The function fiat_secp521r1_addcarryx_u28 is an addition with carry. + * * Postconditions: * out1 = (arg1 + arg2 + arg3) mod 2^28 * out2 = ⌊(arg1 + arg2 + arg3) / 2^28⌋ @@ -4875,6 +4907,7 @@ fiat_secp521r1_addcarryx_u28(uint32_t *out1, /* * The function fiat_secp521r1_subborrowx_u28 is a subtraction with borrow. + * * Postconditions: * out1 = (-arg1 + arg2 + -arg3) mod 2^28 * out2 = -⌊(-arg1 + arg2 + -arg3) / 2^28⌋ @@ -4905,6 +4938,7 @@ fiat_secp521r1_subborrowx_u28(uint32_t *out1, /* * The function fiat_secp521r1_addcarryx_u27 is an addition with carry. + * * Postconditions: * out1 = (arg1 + arg2 + arg3) mod 2^27 * out2 = ⌊(arg1 + arg2 + arg3) / 2^27⌋ @@ -4935,6 +4969,7 @@ fiat_secp521r1_addcarryx_u27(uint32_t *out1, /* * The function fiat_secp521r1_subborrowx_u27 is a subtraction with borrow. + * * Postconditions: * out1 = (-arg1 + arg2 + -arg3) mod 2^27 * out2 = -⌊(-arg1 + arg2 + -arg3) / 2^27⌋ @@ -4965,6 +5000,7 @@ fiat_secp521r1_subborrowx_u27(uint32_t *out1, /* * The function fiat_secp521r1_cmovznz_u32 is a single-word conditional move. + * * Postconditions: * out1 = (if arg1 = 0 then arg2 else arg3) * @@ -4992,18 +5028,16 @@ fiat_secp521r1_cmovznz_u32(uint32_t *out1, /* * The function fiat_secp521r1_carry_mul multiplies two field elements and reduces the result. + * * Postconditions: * eval out1 mod m = (eval arg1 * eval arg2) mod m * - * Input Bounds: - * arg1: [[0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x18000000]] - * arg2: [[0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x18000000]] - * Output Bounds: - * out1: [[0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x8000000]] */ static void -fiat_secp521r1_carry_mul(uint32_t out1[19], const uint32_t arg1[19], - const uint32_t arg2[19]) +fiat_secp521r1_carry_mul( + fiat_secp521r1_tight_field_element out1, + const fiat_secp521r1_loose_field_element arg1, + const fiat_secp521r1_loose_field_element arg2) { uint64_t x1; uint64_t x2; @@ -6176,17 +6210,15 @@ fiat_secp521r1_carry_mul(uint32_t out1[19], const uint32_t arg1[19], /* * The function fiat_secp521r1_carry_square squares a field element and reduces the result. + * * Postconditions: * eval out1 mod m = (eval arg1 * eval arg1) mod m * - * Input Bounds: - * arg1: [[0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x18000000]] - * Output Bounds: - * out1: [[0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x8000000]] */ static void -fiat_secp521r1_carry_square(uint32_t out1[19], - const uint32_t arg1[19]) +fiat_secp521r1_carry_square( + fiat_secp521r1_tight_field_element out1, + const fiat_secp521r1_loose_field_element arg1) { uint32_t x1; uint32_t x2; @@ -6870,17 +6902,17 @@ fiat_secp521r1_carry_square(uint32_t out1[19], } /* - * The function fiat_secp521r1_carry reduces a field element. + * The function fiat_secp521r1_carry_add adds two field elements. + * * Postconditions: - * eval out1 mod m = eval arg1 mod m + * eval out1 mod m = (eval arg1 + eval arg2) mod m * - * Input Bounds: - * arg1: [[0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x18000000]] - * Output Bounds: - * out1: [[0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x8000000]] */ static void -fiat_secp521r1_carry(uint32_t out1[19], const uint32_t arg1[19]) +fiat_secp521r1_carry_add( + fiat_secp521r1_tight_field_element out1, + const fiat_secp521r1_tight_field_element arg1, + const fiat_secp521r1_tight_field_element arg2) { uint32_t x1; uint32_t x2; @@ -6922,25 +6954,25 @@ fiat_secp521r1_carry(uint32_t out1[19], const uint32_t arg1[19]) uint32_t x38; uint32_t x39; uint32_t x40; - x1 = (arg1[0]); - x2 = ((x1 >> 28) + (arg1[1])); - x3 = ((x2 >> 27) + (arg1[2])); - x4 = ((x3 >> 28) + (arg1[3])); - x5 = ((x4 >> 27) + (arg1[4])); - x6 = ((x5 >> 28) + (arg1[5])); - x7 = ((x6 >> 27) + (arg1[6])); - x8 = ((x7 >> 27) + (arg1[7])); - x9 = ((x8 >> 28) + (arg1[8])); - x10 = ((x9 >> 27) + (arg1[9])); - x11 = ((x10 >> 28) + (arg1[10])); - x12 = ((x11 >> 27) + (arg1[11])); - x13 = ((x12 >> 28) + (arg1[12])); - x14 = ((x13 >> 27) + (arg1[13])); - x15 = ((x14 >> 27) + (arg1[14])); - x16 = ((x15 >> 28) + (arg1[15])); - x17 = ((x16 >> 27) + (arg1[16])); - x18 = ((x17 >> 28) + (arg1[17])); - x19 = ((x18 >> 27) + (arg1[18])); + x1 = ((arg1[0]) + (arg2[0])); + x2 = ((x1 >> 28) + ((arg1[1]) + (arg2[1]))); + x3 = ((x2 >> 27) + ((arg1[2]) + (arg2[2]))); + x4 = ((x3 >> 28) + ((arg1[3]) + (arg2[3]))); + x5 = ((x4 >> 27) + ((arg1[4]) + (arg2[4]))); + x6 = ((x5 >> 28) + ((arg1[5]) + (arg2[5]))); + x7 = ((x6 >> 27) + ((arg1[6]) + (arg2[6]))); + x8 = ((x7 >> 27) + ((arg1[7]) + (arg2[7]))); + x9 = ((x8 >> 28) + ((arg1[8]) + (arg2[8]))); + x10 = ((x9 >> 27) + ((arg1[9]) + (arg2[9]))); + x11 = ((x10 >> 28) + ((arg1[10]) + (arg2[10]))); + x12 = ((x11 >> 27) + ((arg1[11]) + (arg2[11]))); + x13 = ((x12 >> 28) + ((arg1[12]) + (arg2[12]))); + x14 = ((x13 >> 27) + ((arg1[13]) + (arg2[13]))); + x15 = ((x14 >> 27) + ((arg1[14]) + (arg2[14]))); + x16 = ((x15 >> 28) + ((arg1[15]) + (arg2[15]))); + x17 = ((x16 >> 27) + ((arg1[16]) + (arg2[16]))); + x18 = ((x17 >> 28) + ((arg1[17]) + (arg2[17]))); + x19 = ((x18 >> 27) + ((arg1[18]) + (arg2[18]))); x20 = ((x1 & UINT32_C(0xfffffff)) + (x19 >> 27)); x21 = ((fiat_secp521r1_uint1)(x20 >> 28) + (x2 & UINT32_C(0x7ffffff))); x22 = (x20 & UINT32_C(0xfffffff)); @@ -6984,93 +7016,17 @@ fiat_secp521r1_carry(uint32_t out1[19], const uint32_t arg1[19]) } /* - * The function fiat_secp521r1_add adds two field elements. - * Postconditions: - * eval out1 mod m = (eval arg1 + eval arg2) mod m + * The function fiat_secp521r1_carry_sub subtracts two field elements. * - * Input Bounds: - * arg1: [[0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x8000000]] - * arg2: [[0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x8000000]] - * Output Bounds: - * out1: [[0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x18000000]] - */ -static void -fiat_secp521r1_add(uint32_t out1[19], const uint32_t arg1[19], - const uint32_t arg2[19]) -{ - uint32_t x1; - uint32_t x2; - uint32_t x3; - uint32_t x4; - uint32_t x5; - uint32_t x6; - uint32_t x7; - uint32_t x8; - uint32_t x9; - uint32_t x10; - uint32_t x11; - uint32_t x12; - uint32_t x13; - uint32_t x14; - uint32_t x15; - uint32_t x16; - uint32_t x17; - uint32_t x18; - uint32_t x19; - x1 = ((arg1[0]) + (arg2[0])); - x2 = ((arg1[1]) + (arg2[1])); - x3 = ((arg1[2]) + (arg2[2])); - x4 = ((arg1[3]) + (arg2[3])); - x5 = ((arg1[4]) + (arg2[4])); - x6 = ((arg1[5]) + (arg2[5])); - x7 = ((arg1[6]) + (arg2[6])); - x8 = ((arg1[7]) + (arg2[7])); - x9 = ((arg1[8]) + (arg2[8])); - x10 = ((arg1[9]) + (arg2[9])); - x11 = ((arg1[10]) + (arg2[10])); - x12 = ((arg1[11]) + (arg2[11])); - x13 = ((arg1[12]) + (arg2[12])); - x14 = ((arg1[13]) + (arg2[13])); - x15 = ((arg1[14]) + (arg2[14])); - x16 = ((arg1[15]) + (arg2[15])); - x17 = ((arg1[16]) + (arg2[16])); - x18 = ((arg1[17]) + (arg2[17])); - x19 = ((arg1[18]) + (arg2[18])); - out1[0] = x1; - out1[1] = x2; - out1[2] = x3; - out1[3] = x4; - out1[4] = x5; - out1[5] = x6; - out1[6] = x7; - out1[7] = x8; - out1[8] = x9; - out1[9] = x10; - out1[10] = x11; - out1[11] = x12; - out1[12] = x13; - out1[13] = x14; - out1[14] = x15; - out1[15] = x16; - out1[16] = x17; - out1[17] = x18; - out1[18] = x19; -} - -/* - * The function fiat_secp521r1_sub subtracts two field elements. * Postconditions: * eval out1 mod m = (eval arg1 - eval arg2) mod m * - * Input Bounds: - * arg1: [[0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x8000000]] - * arg2: [[0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x8000000]] - * Output Bounds: - * out1: [[0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x18000000]] */ static void -fiat_secp521r1_sub(uint32_t out1[19], const uint32_t arg1[19], - const uint32_t arg2[19]) +fiat_secp521r1_carry_sub( + fiat_secp521r1_tight_field_element out1, + const fiat_secp521r1_tight_field_element arg1, + const fiat_secp521r1_tight_field_element arg2) { uint32_t x1; uint32_t x2; @@ -7091,58 +7047,99 @@ fiat_secp521r1_sub(uint32_t out1[19], const uint32_t arg1[19], uint32_t x17; uint32_t x18; uint32_t x19; + uint32_t x20; + uint32_t x21; + uint32_t x22; + uint32_t x23; + uint32_t x24; + uint32_t x25; + uint32_t x26; + uint32_t x27; + uint32_t x28; + uint32_t x29; + uint32_t x30; + uint32_t x31; + uint32_t x32; + uint32_t x33; + uint32_t x34; + uint32_t x35; + uint32_t x36; + uint32_t x37; + uint32_t x38; + uint32_t x39; + uint32_t x40; x1 = ((UINT32_C(0x1ffffffe) + (arg1[0])) - (arg2[0])); - x2 = ((UINT32_C(0xffffffe) + (arg1[1])) - (arg2[1])); - x3 = ((UINT32_C(0x1ffffffe) + (arg1[2])) - (arg2[2])); - x4 = ((UINT32_C(0xffffffe) + (arg1[3])) - (arg2[3])); - x5 = ((UINT32_C(0x1ffffffe) + (arg1[4])) - (arg2[4])); - x6 = ((UINT32_C(0xffffffe) + (arg1[5])) - (arg2[5])); - x7 = ((UINT32_C(0xffffffe) + (arg1[6])) - (arg2[6])); - x8 = ((UINT32_C(0x1ffffffe) + (arg1[7])) - (arg2[7])); - x9 = ((UINT32_C(0xffffffe) + (arg1[8])) - (arg2[8])); - x10 = ((UINT32_C(0x1ffffffe) + (arg1[9])) - (arg2[9])); - x11 = ((UINT32_C(0xffffffe) + (arg1[10])) - (arg2[10])); - x12 = ((UINT32_C(0x1ffffffe) + (arg1[11])) - (arg2[11])); - x13 = ((UINT32_C(0xffffffe) + (arg1[12])) - (arg2[12])); - x14 = ((UINT32_C(0xffffffe) + (arg1[13])) - (arg2[13])); - x15 = ((UINT32_C(0x1ffffffe) + (arg1[14])) - (arg2[14])); - x16 = ((UINT32_C(0xffffffe) + (arg1[15])) - (arg2[15])); - x17 = ((UINT32_C(0x1ffffffe) + (arg1[16])) - (arg2[16])); - x18 = ((UINT32_C(0xffffffe) + (arg1[17])) - (arg2[17])); - x19 = ((UINT32_C(0xffffffe) + (arg1[18])) - (arg2[18])); - out1[0] = x1; - out1[1] = x2; - out1[2] = x3; - out1[3] = x4; - out1[4] = x5; - out1[5] = x6; - out1[6] = x7; - out1[7] = x8; - out1[8] = x9; - out1[9] = x10; - out1[10] = x11; - out1[11] = x12; - out1[12] = x13; - out1[13] = x14; - out1[14] = x15; - out1[15] = x16; - out1[16] = x17; - out1[17] = x18; - out1[18] = x19; + x2 = ((x1 >> 28) + ((UINT32_C(0xffffffe) + (arg1[1])) - (arg2[1]))); + x3 = ((x2 >> 27) + ((UINT32_C(0x1ffffffe) + (arg1[2])) - (arg2[2]))); + x4 = ((x3 >> 28) + ((UINT32_C(0xffffffe) + (arg1[3])) - (arg2[3]))); + x5 = ((x4 >> 27) + ((UINT32_C(0x1ffffffe) + (arg1[4])) - (arg2[4]))); + x6 = ((x5 >> 28) + ((UINT32_C(0xffffffe) + (arg1[5])) - (arg2[5]))); + x7 = ((x6 >> 27) + ((UINT32_C(0xffffffe) + (arg1[6])) - (arg2[6]))); + x8 = ((x7 >> 27) + ((UINT32_C(0x1ffffffe) + (arg1[7])) - (arg2[7]))); + x9 = ((x8 >> 28) + ((UINT32_C(0xffffffe) + (arg1[8])) - (arg2[8]))); + x10 = ((x9 >> 27) + ((UINT32_C(0x1ffffffe) + (arg1[9])) - (arg2[9]))); + x11 = ((x10 >> 28) + ((UINT32_C(0xffffffe) + (arg1[10])) - (arg2[10]))); + x12 = ((x11 >> 27) + ((UINT32_C(0x1ffffffe) + (arg1[11])) - (arg2[11]))); + x13 = ((x12 >> 28) + ((UINT32_C(0xffffffe) + (arg1[12])) - (arg2[12]))); + x14 = ((x13 >> 27) + ((UINT32_C(0xffffffe) + (arg1[13])) - (arg2[13]))); + x15 = ((x14 >> 27) + ((UINT32_C(0x1ffffffe) + (arg1[14])) - (arg2[14]))); + x16 = ((x15 >> 28) + ((UINT32_C(0xffffffe) + (arg1[15])) - (arg2[15]))); + x17 = ((x16 >> 27) + ((UINT32_C(0x1ffffffe) + (arg1[16])) - (arg2[16]))); + x18 = ((x17 >> 28) + ((UINT32_C(0xffffffe) + (arg1[17])) - (arg2[17]))); + x19 = ((x18 >> 27) + ((UINT32_C(0xffffffe) + (arg1[18])) - (arg2[18]))); + x20 = ((x1 & UINT32_C(0xfffffff)) + (x19 >> 27)); + x21 = ((fiat_secp521r1_uint1)(x20 >> 28) + (x2 & UINT32_C(0x7ffffff))); + x22 = (x20 & UINT32_C(0xfffffff)); + x23 = (x21 & UINT32_C(0x7ffffff)); + x24 = ((fiat_secp521r1_uint1)(x21 >> 27) + (x3 & UINT32_C(0xfffffff))); + x25 = (x4 & UINT32_C(0x7ffffff)); + x26 = (x5 & UINT32_C(0xfffffff)); + x27 = (x6 & UINT32_C(0x7ffffff)); + x28 = (x7 & UINT32_C(0x7ffffff)); + x29 = (x8 & UINT32_C(0xfffffff)); + x30 = (x9 & UINT32_C(0x7ffffff)); + x31 = (x10 & UINT32_C(0xfffffff)); + x32 = (x11 & UINT32_C(0x7ffffff)); + x33 = (x12 & UINT32_C(0xfffffff)); + x34 = (x13 & UINT32_C(0x7ffffff)); + x35 = (x14 & UINT32_C(0x7ffffff)); + x36 = (x15 & UINT32_C(0xfffffff)); + x37 = (x16 & UINT32_C(0x7ffffff)); + x38 = (x17 & UINT32_C(0xfffffff)); + x39 = (x18 & UINT32_C(0x7ffffff)); + x40 = (x19 & UINT32_C(0x7ffffff)); + out1[0] = x22; + out1[1] = x23; + out1[2] = x24; + out1[3] = x25; + out1[4] = x26; + out1[5] = x27; + out1[6] = x28; + out1[7] = x29; + out1[8] = x30; + out1[9] = x31; + out1[10] = x32; + out1[11] = x33; + out1[12] = x34; + out1[13] = x35; + out1[14] = x36; + out1[15] = x37; + out1[16] = x38; + out1[17] = x39; + out1[18] = x40; } /* - * The function fiat_secp521r1_opp negates a field element. + * The function fiat_secp521r1_carry_opp negates a field element. + * * Postconditions: * eval out1 mod m = -eval arg1 mod m * - * Input Bounds: - * arg1: [[0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x8000000]] - * Output Bounds: - * out1: [[0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x18000000]] */ static void -fiat_secp521r1_opp(uint32_t out1[19], const uint32_t arg1[19]) +fiat_secp521r1_carry_opp( + fiat_secp521r1_tight_field_element out1, + const fiat_secp521r1_tight_field_element arg1) { uint32_t x1; uint32_t x2; @@ -7163,48 +7160,105 @@ fiat_secp521r1_opp(uint32_t out1[19], const uint32_t arg1[19]) uint32_t x17; uint32_t x18; uint32_t x19; + uint32_t x20; + uint32_t x21; + uint32_t x22; + uint32_t x23; + uint32_t x24; + uint32_t x25; + uint32_t x26; + uint32_t x27; + uint32_t x28; + uint32_t x29; + uint32_t x30; + uint32_t x31; + uint32_t x32; + uint32_t x33; + uint32_t x34; + uint32_t x35; + uint32_t x36; + uint32_t x37; + uint32_t x38; + uint32_t x39; + uint32_t x40; x1 = (UINT32_C(0x1ffffffe) - (arg1[0])); - x2 = (UINT32_C(0xffffffe) - (arg1[1])); - x3 = (UINT32_C(0x1ffffffe) - (arg1[2])); - x4 = (UINT32_C(0xffffffe) - (arg1[3])); - x5 = (UINT32_C(0x1ffffffe) - (arg1[4])); - x6 = (UINT32_C(0xffffffe) - (arg1[5])); - x7 = (UINT32_C(0xffffffe) - (arg1[6])); - x8 = (UINT32_C(0x1ffffffe) - (arg1[7])); - x9 = (UINT32_C(0xffffffe) - (arg1[8])); - x10 = (UINT32_C(0x1ffffffe) - (arg1[9])); - x11 = (UINT32_C(0xffffffe) - (arg1[10])); - x12 = (UINT32_C(0x1ffffffe) - (arg1[11])); - x13 = (UINT32_C(0xffffffe) - (arg1[12])); - x14 = (UINT32_C(0xffffffe) - (arg1[13])); - x15 = (UINT32_C(0x1ffffffe) - (arg1[14])); - x16 = (UINT32_C(0xffffffe) - (arg1[15])); - x17 = (UINT32_C(0x1ffffffe) - (arg1[16])); - x18 = (UINT32_C(0xffffffe) - (arg1[17])); - x19 = (UINT32_C(0xffffffe) - (arg1[18])); - out1[0] = x1; - out1[1] = x2; - out1[2] = x3; - out1[3] = x4; - out1[4] = x5; - out1[5] = x6; - out1[6] = x7; - out1[7] = x8; - out1[8] = x9; - out1[9] = x10; - out1[10] = x11; - out1[11] = x12; - out1[12] = x13; - out1[13] = x14; - out1[14] = x15; - out1[15] = x16; - out1[16] = x17; - out1[17] = x18; - out1[18] = x19; + x2 = ((fiat_secp521r1_uint1)(x1 >> 28) + (UINT32_C(0xffffffe) - (arg1[1]))); + x3 = + ((fiat_secp521r1_uint1)(x2 >> 27) + (UINT32_C(0x1ffffffe) - (arg1[2]))); + x4 = ((fiat_secp521r1_uint1)(x3 >> 28) + (UINT32_C(0xffffffe) - (arg1[3]))); + x5 = + ((fiat_secp521r1_uint1)(x4 >> 27) + (UINT32_C(0x1ffffffe) - (arg1[4]))); + x6 = ((fiat_secp521r1_uint1)(x5 >> 28) + (UINT32_C(0xffffffe) - (arg1[5]))); + x7 = ((fiat_secp521r1_uint1)(x6 >> 27) + (UINT32_C(0xffffffe) - (arg1[6]))); + x8 = + ((fiat_secp521r1_uint1)(x7 >> 27) + (UINT32_C(0x1ffffffe) - (arg1[7]))); + x9 = ((fiat_secp521r1_uint1)(x8 >> 28) + (UINT32_C(0xffffffe) - (arg1[8]))); + x10 = + ((fiat_secp521r1_uint1)(x9 >> 27) + (UINT32_C(0x1ffffffe) - (arg1[9]))); + x11 = ((fiat_secp521r1_uint1)(x10 >> 28) + + (UINT32_C(0xffffffe) - (arg1[10]))); + x12 = ((fiat_secp521r1_uint1)(x11 >> 27) + + (UINT32_C(0x1ffffffe) - (arg1[11]))); + x13 = ((fiat_secp521r1_uint1)(x12 >> 28) + + (UINT32_C(0xffffffe) - (arg1[12]))); + x14 = ((fiat_secp521r1_uint1)(x13 >> 27) + + (UINT32_C(0xffffffe) - (arg1[13]))); + x15 = ((fiat_secp521r1_uint1)(x14 >> 27) + + (UINT32_C(0x1ffffffe) - (arg1[14]))); + x16 = ((fiat_secp521r1_uint1)(x15 >> 28) + + (UINT32_C(0xffffffe) - (arg1[15]))); + x17 = ((fiat_secp521r1_uint1)(x16 >> 27) + + (UINT32_C(0x1ffffffe) - (arg1[16]))); + x18 = ((fiat_secp521r1_uint1)(x17 >> 28) + + (UINT32_C(0xffffffe) - (arg1[17]))); + x19 = ((fiat_secp521r1_uint1)(x18 >> 27) + + (UINT32_C(0xffffffe) - (arg1[18]))); + x20 = ((x1 & UINT32_C(0xfffffff)) + + (uint32_t)(fiat_secp521r1_uint1)(x19 >> 27)); + x21 = ((fiat_secp521r1_uint1)(x20 >> 28) + (x2 & UINT32_C(0x7ffffff))); + x22 = (x20 & UINT32_C(0xfffffff)); + x23 = (x21 & UINT32_C(0x7ffffff)); + x24 = ((fiat_secp521r1_uint1)(x21 >> 27) + (x3 & UINT32_C(0xfffffff))); + x25 = (x4 & UINT32_C(0x7ffffff)); + x26 = (x5 & UINT32_C(0xfffffff)); + x27 = (x6 & UINT32_C(0x7ffffff)); + x28 = (x7 & UINT32_C(0x7ffffff)); + x29 = (x8 & UINT32_C(0xfffffff)); + x30 = (x9 & UINT32_C(0x7ffffff)); + x31 = (x10 & UINT32_C(0xfffffff)); + x32 = (x11 & UINT32_C(0x7ffffff)); + x33 = (x12 & UINT32_C(0xfffffff)); + x34 = (x13 & UINT32_C(0x7ffffff)); + x35 = (x14 & UINT32_C(0x7ffffff)); + x36 = (x15 & UINT32_C(0xfffffff)); + x37 = (x16 & UINT32_C(0x7ffffff)); + x38 = (x17 & UINT32_C(0xfffffff)); + x39 = (x18 & UINT32_C(0x7ffffff)); + x40 = (x19 & UINT32_C(0x7ffffff)); + out1[0] = x22; + out1[1] = x23; + out1[2] = x24; + out1[3] = x25; + out1[4] = x26; + out1[5] = x27; + out1[6] = x28; + out1[7] = x29; + out1[8] = x30; + out1[9] = x31; + out1[10] = x32; + out1[11] = x33; + out1[12] = x34; + out1[13] = x35; + out1[14] = x36; + out1[15] = x37; + out1[16] = x38; + out1[17] = x39; + out1[18] = x40; } /* * The function fiat_secp521r1_selectznz is a multi-limb conditional select. + * * Postconditions: * eval out1 = (if arg1 = 0 then eval arg2 else eval arg3) * @@ -7282,16 +7336,16 @@ fiat_secp521r1_selectznz(uint32_t out1[19], /* * The function fiat_secp521r1_to_bytes serializes a field element to bytes in little-endian order. + * * Postconditions: * out1 = map (λ x, ⌊((eval arg1 mod m) mod 2^(8 * (x + 1))) / 2^(8 * x)⌋) [0..65] * - * Input Bounds: - * arg1: [[0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x8000000]] * Output Bounds: * out1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0x1]] */ static void -fiat_secp521r1_to_bytes(uint8_t out1[66], const uint32_t arg1[19]) +fiat_secp521r1_to_bytes( + uint8_t out1[66], const fiat_secp521r1_tight_field_element arg1) { uint32_t x1; fiat_secp521r1_uint1 x2; @@ -7830,16 +7884,15 @@ fiat_secp521r1_to_bytes(uint8_t out1[66], const uint32_t arg1[19]) /* * The function fiat_secp521r1_from_bytes deserializes a field element from bytes in little-endian order. + * * Postconditions: * eval out1 mod m = bytes_eval arg1 mod m * * Input Bounds: * arg1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0x1]] - * Output Bounds: - * out1: [[0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x8000000]] */ static void -fiat_secp521r1_from_bytes(uint32_t out1[19], +fiat_secp521r1_from_bytes(fiat_secp521r1_tight_field_element out1, const uint8_t arg1[66]) { uint32_t x1; @@ -8187,67 +8240,7 @@ fiat_secp521r1_from_bytes(uint32_t out1[19], /* END verbatim fiat code */ -/*- - * Finite field inversion via FLT. - * NB: this is not a real Fiat function, just named that way for consistency. - * Autogenerated: ecp/secp521r1/fe_inv.op3 - * custom repunit addition chain - */ -static void -fiat_secp521r1_inv(fe_t output, const fe_t t1) -{ - int i; - /* temporary variables */ - fe_t acc, t128, t16, t2, t256, t32, t4, t512, t516, t518, t519, t64, t8; - - fiat_secp521r1_carry_square(acc, t1); - fiat_secp521r1_carry_mul(t2, acc, t1); - fiat_secp521r1_carry_square(acc, t2); - fiat_secp521r1_carry_square(acc, acc); - fiat_secp521r1_carry_mul(t4, acc, t2); - fiat_secp521r1_carry_square(acc, t4); - for (i = 0; i < 3; i++) - fiat_secp521r1_carry_square(acc, acc); - fiat_secp521r1_carry_mul(t8, acc, t4); - fiat_secp521r1_carry_square(acc, t8); - for (i = 0; i < 7; i++) - fiat_secp521r1_carry_square(acc, acc); - fiat_secp521r1_carry_mul(t16, acc, t8); - fiat_secp521r1_carry_square(acc, t16); - for (i = 0; i < 15; i++) - fiat_secp521r1_carry_square(acc, acc); - fiat_secp521r1_carry_mul(t32, acc, t16); - fiat_secp521r1_carry_square(acc, t32); - for (i = 0; i < 31; i++) - fiat_secp521r1_carry_square(acc, acc); - fiat_secp521r1_carry_mul(t64, acc, t32); - fiat_secp521r1_carry_square(acc, t64); - for (i = 0; i < 63; i++) - fiat_secp521r1_carry_square(acc, acc); - fiat_secp521r1_carry_mul(t128, acc, t64); - fiat_secp521r1_carry_square(acc, t128); - for (i = 0; i < 127; i++) - fiat_secp521r1_carry_square(acc, acc); - fiat_secp521r1_carry_mul(t256, acc, t128); - fiat_secp521r1_carry_square(acc, t256); - for (i = 0; i < 255; i++) - fiat_secp521r1_carry_square(acc, acc); - fiat_secp521r1_carry_mul(t512, acc, t256); - fiat_secp521r1_carry_square(acc, t512); - for (i = 0; i < 3; i++) - fiat_secp521r1_carry_square(acc, acc); - fiat_secp521r1_carry_mul(t516, acc, t4); - fiat_secp521r1_carry_square(acc, t516); - fiat_secp521r1_carry_square(acc, acc); - fiat_secp521r1_carry_mul(t518, acc, t2); - fiat_secp521r1_carry_square(acc, t518); - fiat_secp521r1_carry_mul(t519, acc, t1); - fiat_secp521r1_carry_square(acc, t519); - fiat_secp521r1_carry_square(acc, acc); - fiat_secp521r1_carry_mul(output, acc, t1); -} - -/* curve coefficient constants */ +/* curve-related constants */ static const limb_t const_one[19] = { UINT32_C(0x00000001), UINT32_C(0x00000000), UINT32_C(0x00000000), @@ -11212,6 +11205,67 @@ static const pt_aff_t lut_cmb[13][16] = { }; /*- + * Finite field inversion. + * Computed with exponentiation via FLT. + * Autogenerated: ecp/secp521r1/fe_inv.op3 + * custom repunit addition chain + * NB: this is not a real fiat-crypto function, just named that way for consistency. + */ +static void +fiat_secp521r1_inv(fe_t output, const fe_t t1) +{ + int i; + /* temporary variables */ + fe_t acc, t128, t16, t2, t256, t32, t4, t512, t516, t518, t519, t64, t8; + + fiat_secp521r1_carry_square(acc, t1); + fiat_secp521r1_carry_mul(t2, acc, t1); + fiat_secp521r1_carry_square(acc, t2); + fiat_secp521r1_carry_square(acc, acc); + fiat_secp521r1_carry_mul(t4, acc, t2); + fiat_secp521r1_carry_square(acc, t4); + for (i = 0; i < 3; i++) + fiat_secp521r1_carry_square(acc, acc); + fiat_secp521r1_carry_mul(t8, acc, t4); + fiat_secp521r1_carry_square(acc, t8); + for (i = 0; i < 7; i++) + fiat_secp521r1_carry_square(acc, acc); + fiat_secp521r1_carry_mul(t16, acc, t8); + fiat_secp521r1_carry_square(acc, t16); + for (i = 0; i < 15; i++) + fiat_secp521r1_carry_square(acc, acc); + fiat_secp521r1_carry_mul(t32, acc, t16); + fiat_secp521r1_carry_square(acc, t32); + for (i = 0; i < 31; i++) + fiat_secp521r1_carry_square(acc, acc); + fiat_secp521r1_carry_mul(t64, acc, t32); + fiat_secp521r1_carry_square(acc, t64); + for (i = 0; i < 63; i++) + fiat_secp521r1_carry_square(acc, acc); + fiat_secp521r1_carry_mul(t128, acc, t64); + fiat_secp521r1_carry_square(acc, t128); + for (i = 0; i < 127; i++) + fiat_secp521r1_carry_square(acc, acc); + fiat_secp521r1_carry_mul(t256, acc, t128); + fiat_secp521r1_carry_square(acc, t256); + for (i = 0; i < 255; i++) + fiat_secp521r1_carry_square(acc, acc); + fiat_secp521r1_carry_mul(t512, acc, t256); + fiat_secp521r1_carry_square(acc, t512); + for (i = 0; i < 3; i++) + fiat_secp521r1_carry_square(acc, acc); + fiat_secp521r1_carry_mul(t516, acc, t4); + fiat_secp521r1_carry_square(acc, t516); + fiat_secp521r1_carry_square(acc, acc); + fiat_secp521r1_carry_mul(t518, acc, t2); + fiat_secp521r1_carry_square(acc, t518); + fiat_secp521r1_carry_mul(t519, acc, t1); + fiat_secp521r1_carry_square(acc, t519); + fiat_secp521r1_carry_square(acc, acc); + fiat_secp521r1_carry_mul(output, acc, t1); +} + +/*- * Q := 2P, both projective, Q and P same pointers OK * Autogenerated: op3/dbl_proj.op3 * https://eprint.iacr.org/2015/1060 Alg 6 @@ -11272,7 +11326,7 @@ point_double(pt_prj_t *Q, const pt_prj_t *P) /*- * out1 = (arg1 == 0) ? 0 : nz * NB: this is not a "mod p equiv" 0, but literal 0 - * NB: this is not a real Fiat function, just named that way for consistency. + * NB: this is not a real fiat-crypto function, just named that way for consistency. */ static void fiat_secp521r1_nonzero(limb_t *out1, const fe_t arg1) @@ -11533,7 +11587,7 @@ var_smul_wnaf_two(pt_aff_t *out, const unsigned char a[66], int i, d, is_neg, is_inf = 1, flipped = 0; int8_t anaf[529] = { 0 }; int8_t bnaf[529] = { 0 }; - pt_prj_t Q = { { 0 } }; + pt_prj_t Q = { { 0 }, { 0 }, { 0 } }; pt_prj_t precomp[DRADIX / 2]; precomp_wnaf(precomp, P); @@ -11545,7 +11599,7 @@ var_smul_wnaf_two(pt_aff_t *out, const unsigned char a[66], point_double(&Q, &Q); if ((d = bnaf[i])) { if ((is_neg = d < 0) != flipped) { - fiat_secp521r1_opp(Q.Y, Q.Y); + fiat_secp521r1_carry_opp(Q.Y, Q.Y); flipped ^= 1; } d = (is_neg) ? (-d - 1) >> 1 : (d - 1) >> 1; @@ -11560,7 +11614,7 @@ var_smul_wnaf_two(pt_aff_t *out, const unsigned char a[66], } if ((d = anaf[i])) { if ((is_neg = d < 0) != flipped) { - fiat_secp521r1_opp(Q.Y, Q.Y); + fiat_secp521r1_carry_opp(Q.Y, Q.Y); flipped ^= 1; } d = (is_neg) ? (-d - 1) >> 1 : (d - 1) >> 1; @@ -11584,7 +11638,7 @@ var_smul_wnaf_two(pt_aff_t *out, const unsigned char a[66], if (flipped) { /* correct sign */ - fiat_secp521r1_opp(Q.Y, Q.Y); + fiat_secp521r1_carry_opp(Q.Y, Q.Y); } /* convert to affine -- NB depends on coordinate system */ @@ -11595,6 +11649,8 @@ var_smul_wnaf_two(pt_aff_t *out, const unsigned char a[66], /*- * Variable point scalar multiplication with "regular" wnaf. + * Here "regular" means _no zeroes_, so the sequence of + * EC arithmetic ops is fixed. */ static void var_smul_rwnaf(pt_aff_t *out, const unsigned char scalar[66], @@ -11602,14 +11658,14 @@ var_smul_rwnaf(pt_aff_t *out, const unsigned char scalar[66], { int i, j, d, diff, is_neg; int8_t rnaf[106] = { 0 }; - pt_prj_t Q = { { 0 } }, lut = { { 0 } }; + pt_prj_t Q = { { 0 }, { 0 }, { 0 } }, lut = { { 0 }, { 0 }, { 0 } }; pt_prj_t precomp[DRADIX / 2]; precomp_wnaf(precomp, P); scalar_rwnaf(rnaf, scalar); #if defined(_MSC_VER) -/* result still unsigned: yes we know */ + /* result still unsigned: yes we know */ #pragma warning(push) #pragma warning(disable : 4146) #endif @@ -11639,7 +11695,7 @@ var_smul_rwnaf(pt_aff_t *out, const unsigned char scalar[66], fiat_secp521r1_selectznz(lut.Z, diff, lut.Z, precomp[j].Z); } /* negate lut point if digit is negative */ - fiat_secp521r1_opp(out->Y, lut.Y); + fiat_secp521r1_carry_opp(out->Y, lut.Y); fiat_secp521r1_selectznz(lut.Y, is_neg, lut.Y, out->Y); point_add_proj(&Q, &Q, &lut); } @@ -11650,7 +11706,7 @@ var_smul_rwnaf(pt_aff_t *out, const unsigned char scalar[66], /* conditionally subtract P if the scalar was even */ fe_copy(lut.X, precomp[0].X); - fiat_secp521r1_opp(lut.Y, precomp[0].Y); + fiat_secp521r1_carry_opp(lut.Y, precomp[0].Y); fe_copy(lut.Z, precomp[0].Z); point_add_proj(&lut, &lut, &Q); fiat_secp521r1_selectznz(Q.X, scalar[0] & 1, lut.X, Q.X); @@ -11671,8 +11727,8 @@ fixed_smul_cmb(pt_aff_t *out, const unsigned char scalar[66]) { int i, j, k, d, diff, is_neg = 0; int8_t rnaf[106] = { 0 }; - pt_prj_t Q = { { 0 } }, R = { { 0 } }; - pt_aff_t lut = { { 0 } }; + pt_prj_t Q = { { 0 }, { 0 }, { 0 } }, R = { { 0 }, { 0 }, { 0 } }; + pt_aff_t lut = { { 0 }, { 0 } }; scalar_rwnaf(rnaf, scalar); @@ -11682,7 +11738,7 @@ fixed_smul_cmb(pt_aff_t *out, const unsigned char scalar[66]) fe_set_zero(Q.Z); #if defined(_MSC_VER) -/* result still unsigned: yes we know */ + /* result still unsigned: yes we know */ #pragma warning(push) #pragma warning(disable : 4146) #endif @@ -11705,7 +11761,7 @@ fixed_smul_cmb(pt_aff_t *out, const unsigned char scalar[66]) fiat_secp521r1_selectznz(lut.Y, diff, lut.Y, lut_cmb[j][k].Y); } /* negate lut point if digit is negative */ - fiat_secp521r1_opp(out->Y, lut.Y); + fiat_secp521r1_carry_opp(out->Y, lut.Y); fiat_secp521r1_selectznz(lut.Y, is_neg, lut.Y, out->Y); point_add_mixed(&Q, &Q, &lut); } @@ -11717,7 +11773,7 @@ fixed_smul_cmb(pt_aff_t *out, const unsigned char scalar[66]) /* conditionally subtract P if the scalar was even */ fe_copy(lut.X, lut_cmb[0][0].X); - fiat_secp521r1_opp(lut.Y, lut_cmb[0][0].Y); + fiat_secp521r1_carry_opp(lut.Y, lut_cmb[0][0].Y); point_add_mixed(&R, &Q, &lut); fiat_secp521r1_selectznz(Q.X, scalar[0] & 1, R.X, Q.X); fiat_secp521r1_selectznz(Q.Y, scalar[0] & 1, R.Y, Q.Y); @@ -11736,10 +11792,11 @@ fixed_smul_cmb(pt_aff_t *out, const unsigned char scalar[66]) * Everything is LE byte ordering. */ static void -point_mul_two(unsigned char outx[66], unsigned char outy[66], - const unsigned char a[66], const unsigned char b[66], - const unsigned char inx[66], - const unsigned char iny[66]) +point_mul_two_secp521r1(unsigned char outx[66], unsigned char outy[66], + const unsigned char a[66], + const unsigned char b[66], + const unsigned char inx[66], + const unsigned char iny[66]) { pt_aff_t P; @@ -11758,8 +11815,8 @@ point_mul_two(unsigned char outx[66], unsigned char outy[66], * Everything is LE byte ordering. */ static void -point_mul_g(unsigned char outx[66], unsigned char outy[66], - const unsigned char scalar[66]) +point_mul_g_secp521r1(unsigned char outx[66], unsigned char outy[66], + const unsigned char scalar[66]) { pt_aff_t P; @@ -11776,10 +11833,10 @@ point_mul_g(unsigned char outx[66], unsigned char outy[66], * Everything is LE byte ordering. */ static void -point_mul(unsigned char outx[66], unsigned char outy[66], - const unsigned char scalar[66], - const unsigned char inx[66], - const unsigned char iny[66]) +point_mul_secp521r1(unsigned char outx[66], unsigned char outy[66], + const unsigned char scalar[66], + const unsigned char inx[66], + const unsigned char iny[66]) { pt_aff_t P; @@ -11904,8 +11961,8 @@ point_mul(unsigned char outx[66], unsigned char outy[66], } while (0) static mp_err -point_mul_g_secp521r1(const mp_int *n, mp_int *out_x, - mp_int *out_y, const ECGroup *group) +point_mul_g_secp521r1_wrap(const mp_int *n, mp_int *out_x, + mp_int *out_y, const ECGroup *group) { unsigned char b_x[66]; unsigned char b_y[66]; @@ -11920,7 +11977,7 @@ point_mul_g_secp521r1(const mp_int *n, mp_int *out_x, MP_CHECKOK(mp_to_fixlen_octets(n, b_n, 66)); MP_BE2LE(b_n); - point_mul_g(b_x, b_y, b_n); + point_mul_g_secp521r1(b_x, b_y, b_n); MP_BE2LE(b_x); MP_BE2LE(b_y); MP_CHECKOK(mp_read_unsigned_octets(out_x, b_x, 66)); @@ -11931,9 +11988,9 @@ CLEANUP: } static mp_err -point_mul_secp521r1(const mp_int *n, const mp_int *in_x, - const mp_int *in_y, mp_int *out_x, - mp_int *out_y, const ECGroup *group) +point_mul_secp521r1_wrap(const mp_int *n, const mp_int *in_x, + const mp_int *in_y, mp_int *out_x, + mp_int *out_y, const ECGroup *group) { unsigned char b_x[66]; unsigned char b_y[66]; @@ -11954,7 +12011,7 @@ point_mul_secp521r1(const mp_int *n, const mp_int *in_x, MP_BE2LE(b_x); MP_BE2LE(b_y); MP_BE2LE(b_n); - point_mul(b_x, b_y, b_n, b_x, b_y); + point_mul_secp521r1(b_x, b_y, b_n, b_x, b_y); MP_BE2LE(b_x); MP_BE2LE(b_y); MP_CHECKOK(mp_read_unsigned_octets(out_x, b_x, 66)); @@ -11965,10 +12022,11 @@ CLEANUP: } static mp_err -point_mul_two_secp521r1(const mp_int *n1, const mp_int *n2, - const mp_int *in_x, const mp_int *in_y, - mp_int *out_x, mp_int *out_y, - const ECGroup *group) +point_mul_two_secp521r1_wrap(const mp_int *n1, const mp_int *n2, + const mp_int *in_x, + const mp_int *in_y, mp_int *out_x, + mp_int *out_y, + const ECGroup *group) { unsigned char b_x[66]; unsigned char b_y[66]; @@ -11978,11 +12036,11 @@ point_mul_two_secp521r1(const mp_int *n1, const mp_int *n2, /* If n2 == NULL or 0, this is just a base-point multiplication. */ if (n2 == NULL || mp_cmp_z(n2) == MP_EQ) - return point_mul_g_secp521r1(n1, out_x, out_y, group); + return point_mul_g_secp521r1_wrap(n1, out_x, out_y, group); /* If n1 == NULL or 0, this is just an arbitary-point multiplication. */ if (n1 == NULL || mp_cmp_z(n1) == MP_EQ) - return point_mul_secp521r1(n2, in_x, in_y, out_x, out_y, group); + return point_mul_secp521r1_wrap(n2, in_x, in_y, out_x, out_y, group); ARGCHK(in_x != NULL && in_y != NULL && out_x != NULL && out_y != NULL, MP_BADARG); @@ -12000,7 +12058,7 @@ point_mul_two_secp521r1(const mp_int *n1, const mp_int *n2, MP_BE2LE(b_y); MP_BE2LE(b_n1); MP_BE2LE(b_n2); - point_mul_two(b_x, b_y, b_n1, b_n2, b_x, b_y); + point_mul_two_secp521r1(b_x, b_y, b_n1, b_n2, b_x, b_y); MP_BE2LE(b_x); MP_BE2LE(b_y); MP_CHECKOK(mp_read_unsigned_octets(out_x, b_x, 66)); @@ -12014,9 +12072,9 @@ mp_err ec_group_set_secp521r1(ECGroup *group, ECCurveName name) { if (name == ECCurve_NIST_P521) { - group->base_point_mul = &point_mul_g_secp521r1; - group->point_mul = &point_mul_secp521r1; - group->points_mul = &point_mul_two_secp521r1; + group->base_point_mul = &point_mul_g_secp521r1_wrap; + group->point_mul = &point_mul_secp521r1_wrap; + group->points_mul = &point_mul_two_secp521r1_wrap; } return MP_OKAY; } |