diff options
Diffstat (limited to 'security/nss/lib/freebl/ecl/ecp_192.c')
-rw-r--r-- | security/nss/lib/freebl/ecl/ecp_192.c | 469 |
1 files changed, 378 insertions, 91 deletions
diff --git a/security/nss/lib/freebl/ecl/ecp_192.c b/security/nss/lib/freebl/ecl/ecp_192.c index 26867ae3e..f4cd42bc3 100644 --- a/security/nss/lib/freebl/ecl/ecp_192.c +++ b/security/nss/lib/freebl/ecl/ecp_192.c @@ -42,6 +42,8 @@ #include "mpi-priv.h" #include <stdlib.h> +#define ECP192_DIGITS ECL_CURVE_DIGITS(192) + /* Fast modular reduction for p192 = 2^192 - 2^64 - 1. a can be r. Uses * algorithm 7 from Brown, Hankerson, Lopez, Menezes. Software * Implementation of the NIST Elliptic Curves over Prime Fields. */ @@ -50,101 +52,127 @@ ec_GFp_nistp192_mod(const mp_int *a, mp_int *r, const GFMethod *meth) { mp_err res = MP_OKAY; mp_size a_used = MP_USED(a); - - /* s is a statically-allocated mp_int of exactly the size we need */ - mp_int s; - + mp_digit r3; +#ifndef MPI_AMD64_ADD + mp_digit carry; +#endif #ifdef ECL_THIRTY_TWO_BIT - mp_digit sa[6]; - mp_digit a11 = 0, a10, a9 = 0, a8, a7 = 0, a6; - - MP_SIGN(&s) = MP_ZPOS; - MP_ALLOC(&s) = 6; - MP_USED(&s) = 6; - MP_DIGITS(&s) = sa; + mp_digit a5a = 0, a5b = 0, a4a = 0, a4b = 0, a3a = 0, a3b = 0; + mp_digit r0a, r0b, r1a, r1b, r2a, r2b; #else - mp_digit sa[3]; mp_digit a5 = 0, a4 = 0, a3 = 0; - - MP_SIGN(&s) = MP_ZPOS; - MP_ALLOC(&s) = 3; - MP_USED(&s) = 3; - MP_DIGITS(&s) = sa; + mp_digit r0, r1, r2; #endif /* reduction not needed if a is not larger than field size */ -#ifdef ECL_THIRTY_TWO_BIT - if (a_used < 6) { -#else - if (a_used < 3) { -#endif + if (a_used < ECP192_DIGITS) { + if (a == r) { + return MP_OKAY; + } return mp_copy(a, r); } -#ifdef ECL_THIRTY_TWO_BIT + /* for polynomials larger than twice the field size, use regular * reduction */ - if (a_used > 12) { + if (a_used > ECP192_DIGITS*2) { MP_CHECKOK(mp_mod(a, &meth->irr, r)); } else { /* copy out upper words of a */ + +#ifdef ECL_THIRTY_TWO_BIT + + /* in all the math below, + * nXb is most signifiant, nXa is least significant */ switch (a_used) { case 12: - a11 = MP_DIGIT(a, 11); + a5b = MP_DIGIT(a, 11); case 11: - a10 = MP_DIGIT(a, 10); + a5a = MP_DIGIT(a, 10); case 10: - a9 = MP_DIGIT(a, 9); + a4b = MP_DIGIT(a, 9); case 9: - a8 = MP_DIGIT(a, 8); + a4a = MP_DIGIT(a, 8); case 8: - a7 = MP_DIGIT(a, 7); + a3b = MP_DIGIT(a, 7); case 7: - a6 = MP_DIGIT(a, 6); + a3a = MP_DIGIT(a, 6); } + + + r2b= MP_DIGIT(a, 5); + r2a= MP_DIGIT(a, 4); + r1b = MP_DIGIT(a, 3); + r1a = MP_DIGIT(a, 2); + r0b = MP_DIGIT(a, 1); + r0a = MP_DIGIT(a, 0); + + /* implement r = (a2,a1,a0)+(a5,a5,a5)+(a4,a4,0)+(0,a3,a3) */ + MP_ADD_CARRY(r0a, a3a, r0a, 0, carry); + MP_ADD_CARRY(r0b, a3b, r0b, carry, carry); + MP_ADD_CARRY(r1a, a3a, r1a, carry, carry); + MP_ADD_CARRY(r1b, a3b, r1b, carry, carry); + MP_ADD_CARRY(r2a, a4a, r2a, carry, carry); + MP_ADD_CARRY(r2b, a4b, r2b, carry, carry); + r3 = carry; carry = 0; + MP_ADD_CARRY(r0a, a5a, r0a, 0, carry); + MP_ADD_CARRY(r0b, a5b, r0b, carry, carry); + MP_ADD_CARRY(r1a, a5a, r1a, carry, carry); + MP_ADD_CARRY(r1b, a5b, r1b, carry, carry); + MP_ADD_CARRY(r2a, a5a, r2a, carry, carry); + MP_ADD_CARRY(r2b, a5b, r2b, carry, carry); + r3 += carry; + MP_ADD_CARRY(r1a, a4a, r1a, 0, carry); + MP_ADD_CARRY(r1b, a4b, r1b, carry, carry); + MP_ADD_CARRY(r2a, 0, r2a, carry, carry); + MP_ADD_CARRY(r2b, 0, r2b, carry, carry); + r3 += carry; + + /* reduce out the carry */ + while (r3) { + MP_ADD_CARRY(r0a, r3, r0a, 0, carry); + MP_ADD_CARRY(r0b, 0, r0b, carry, carry); + MP_ADD_CARRY(r1a, r3, r1a, carry, carry); + MP_ADD_CARRY(r1b, 0, r1b, carry, carry); + MP_ADD_CARRY(r2a, 0, r2a, carry, carry); + MP_ADD_CARRY(r2b, 0, r2b, carry, carry); + r3 = carry; + } + + /* check for final reduction */ + /* + * our field is 0xffffffffffffffff, 0xfffffffffffffffe, + * 0xffffffffffffffff. That means we can only be over and need + * one more reduction + * if r2 == 0xffffffffffffffffff (same as r2+1 == 0) + * and + * r1 == 0xffffffffffffffffff or + * r1 == 0xfffffffffffffffffe and r0 = 0xfffffffffffffffff + * In all cases, we subtract the field (or add the 2's + * complement value (1,1,0)). (r0, r1, r2) + */ + if (((r2b == 0xffffffff) && (r2a == 0xffffffff) + && (r1b == 0xffffffff) ) && + ((r1a == 0xffffffff) || + (r1a == 0xfffffffe) && (r0a == 0xffffffff) && + (r0b == 0xffffffff)) ) { + /* do a quick subtract */ + MP_ADD_CARRY(r0a, 1, r0a, 0, carry); + r0b += carry; + r1a = r1b = r2a = r2b = 0; + } + /* set the lower words of r */ if (a != r) { - MP_CHECKOK(s_mp_pad(r, 7)); - MP_DIGIT(r, 5) = MP_DIGIT(a, 5); - MP_DIGIT(r, 4) = MP_DIGIT(a, 4); - MP_DIGIT(r, 3) = MP_DIGIT(a, 3); - MP_DIGIT(r, 2) = MP_DIGIT(a, 2); - MP_DIGIT(r, 1) = MP_DIGIT(a, 1); - MP_DIGIT(r, 0) = MP_DIGIT(a, 0); + MP_CHECKOK(s_mp_pad(r, 6)); } + MP_DIGIT(r, 5) = r2b; + MP_DIGIT(r, 4) = r2a; + MP_DIGIT(r, 3) = r1b; + MP_DIGIT(r, 2) = r1a; + MP_DIGIT(r, 1) = r0b; + MP_DIGIT(r, 0) = r0a; MP_USED(r) = 6; - /* compute r = s1 + s2 + s3 + s4, where s1 = (a2,a1,a0), s2 = - * (0,a3,a3), s3 = (a4,a4,0), and s4 = (a5,a5,a5), for - * sixty-four-bit words */ - switch (a_used) { - case 12: - case 11: - sa[5] = sa[3] = sa[1] = a11; - sa[4] = sa[2] = sa[0] = a10; - MP_CHECKOK(mp_add(r, &s, r)); - case 10: - case 9: - sa[5] = sa[3] = a9; - sa[4] = sa[2] = a8; - sa[1] = sa[0] = 0; - MP_CHECKOK(mp_add(r, &s, r)); - case 8: - case 7: - sa[5] = sa[4] = 0; - sa[3] = sa[1] = a7; - sa[2] = sa[0] = a6; - MP_CHECKOK(mp_add(r, &s, r)); - } - /* there might be 1 or 2 bits left to reduce; use regular - * reduction for this */ - MP_CHECKOK(mp_mod(r, &meth->irr, r)); - } #else - /* for polynomials larger than twice the field size, use regular - * reduction */ - if (a_used > 6) { - MP_CHECKOK(mp_mod(a, &meth->irr, r)); - } else { - /* copy out upper words of a */ switch (a_used) { case 6: a5 = MP_DIGIT(a, 5); @@ -153,39 +181,268 @@ ec_GFp_nistp192_mod(const mp_int *a, mp_int *r, const GFMethod *meth) case 4: a3 = MP_DIGIT(a, 3); } + + r2 = MP_DIGIT(a, 2); + r1 = MP_DIGIT(a, 1); + r0 = MP_DIGIT(a, 0); + + /* implement r = (a2,a1,a0)+(a5,a5,a5)+(a4,a4,0)+(0,a3,a3) */ +#ifndef MPI_AMD64_ADD + MP_ADD_CARRY(r0, a3, r0, 0, carry); + MP_ADD_CARRY(r1, a3, r1, carry, carry); + MP_ADD_CARRY(r2, a4, r2, carry, carry); + r3 = carry; + MP_ADD_CARRY(r0, a5, r0, 0, carry); + MP_ADD_CARRY(r1, a5, r1, carry, carry); + MP_ADD_CARRY(r2, a5, r2, carry, carry); + r3 += carry; + MP_ADD_CARRY(r1, a4, r1, 0, carry); + MP_ADD_CARRY(r2, 0, r2, carry, carry); + r3 += carry; + +#else + r2 = MP_DIGIT(a, 2); + r1 = MP_DIGIT(a, 1); + r0 = MP_DIGIT(a, 0); + + /* set the lower words of r */ + __asm__ ( + "xorq %3,%3 \n\t" + "addq %4,%0 \n\t" + "adcq %4,%1 \n\t" + "adcq %5,%2 \n\t" + "adcq $0,%3 \n\t" + "addq %6,%0 \n\t" + "adcq %6,%1 \n\t" + "adcq %6,%2 \n\t" + "adcq $0,%3 \n\t" + "addq %5,%1 \n\t" + "adcq $0,%2 \n\t" + "adcq $0,%3 \n\t" + : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(a3), + "=r"(a4), "=r"(a5) + : "0" (r0), "1" (r1), "2" (r2), "3" (r3), + "4" (a3), "5" (a4), "6"(a5) + : "%cc" ); +#endif + + /* reduce out the carry */ + while (r3) { +#ifndef MPI_AMD64_ADD + MP_ADD_CARRY(r0, r3, r0, 0, carry); + MP_ADD_CARRY(r1, r3, r1, carry, carry); + MP_ADD_CARRY(r2, 0, r2, carry, carry); + r3 = carry; +#else + a3=r3; + __asm__ ( + "xorq %3,%3 \n\t" + "addq %4,%0 \n\t" + "adcq %4,%1 \n\t" + "adcq $0,%2 \n\t" + "adcq $0,%3 \n\t" + : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(a3) + : "0" (r0), "1" (r1), "2" (r2), "3" (r3), "4"(a3) + : "%cc" ); +#endif + } + + /* check for final reduction */ + /* + * our field is 0xffffffffffffffff, 0xfffffffffffffffe, + * 0xffffffffffffffff. That means we can only be over and need + * one more reduction + * if r2 == 0xffffffffffffffffff (same as r2+1 == 0) + * and + * r1 == 0xffffffffffffffffff or + * r1 == 0xfffffffffffffffffe and r0 = 0xfffffffffffffffff + * In all cases, we subtract the field (or add the 2's + * complement value (1,1,0)). (r0, r1, r2) + */ + if (r3 || ((r2 == MP_DIGIT_MAX) && + ((r1 == MP_DIGIT_MAX) || + ((r1 == (MP_DIGIT_MAX-1)) && (r0 == MP_DIGIT_MAX))))) { + /* do a quick subtract */ + r0++; + r1 = r2 = 0; + } /* set the lower words of r */ if (a != r) { - MP_CHECKOK(s_mp_pad(r, 4)); - MP_DIGIT(r, 2) = MP_DIGIT(a, 2); - MP_DIGIT(r, 1) = MP_DIGIT(a, 1); - MP_DIGIT(r, 0) = MP_DIGIT(a, 0); + MP_CHECKOK(s_mp_pad(r, 3)); } + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; MP_USED(r) = 3; - /* compute r = s1 + s2 + s3 + s4, where s1 = (a2,a1,a0), s2 = - * (0,a3,a3), s3 = (a4,a4,0), and s4 = (a5,a5,a5) */ - switch (a_used) { - case 6: - sa[2] = sa[1] = sa[0] = a5; - MP_CHECKOK(mp_add(r, &s, r)); - case 5: - sa[2] = sa[1] = a4; - sa[0] = 0; - MP_CHECKOK(mp_add(r, &s, r)); - case 4: - sa[2] = 0; - sa[1] = sa[0] = a3; - MP_CHECKOK(mp_add(r, &s, r)); - } - /* there might be 1 or 2 bits left to reduce; use regular - * reduction for this */ - MP_CHECKOK(mp_mod(r, &meth->irr, r)); +#endif } + + CLEANUP: + return res; +} + +#ifndef ECL_THIRTY_TWO_BIT +/* Compute the sum of 192 bit curves. Do the work in-line since the + * number of words are so small, we don't want to overhead of mp function + * calls. Uses optimized modular reduction for p192. + */ +mp_err +ec_GFp_nistp192_add(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + mp_digit a0 = 0, a1 = 0, a2 = 0; + mp_digit r0 = 0, r1 = 0, r2 = 0; + mp_digit carry; + + switch(MP_USED(a)) { + case 3: + a2 = MP_DIGIT(a,2); + case 2: + a1 = MP_DIGIT(a,1); + case 1: + a0 = MP_DIGIT(a,0); + } + switch(MP_USED(b)) { + case 3: + r2 = MP_DIGIT(b,2); + case 2: + r1 = MP_DIGIT(b,1); + case 1: + r0 = MP_DIGIT(b,0); + } + +#ifndef MPI_AMD64_ADD + MP_ADD_CARRY(a0, r0, r0, 0, carry); + MP_ADD_CARRY(a1, r1, r1, carry, carry); + MP_ADD_CARRY(a2, r2, r2, carry, carry); +#else + __asm__ ( + "xorq %3,%3 \n\t" + "addq %4,%0 \n\t" + "adcq %5,%1 \n\t" + "adcq %6,%2 \n\t" + "adcq $0,%3 \n\t" + : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(carry) + : "r" (a0), "r" (a1), "r" (a2), "0" (r0), + "1" (r1), "2" (r2) + : "%cc" ); +#endif + + /* Do quick 'subract' if we've gone over + * (add the 2's complement of the curve field) */ + if (carry || ((r2 == MP_DIGIT_MAX) && + ((r1 == MP_DIGIT_MAX) || + ((r1 == (MP_DIGIT_MAX-1)) && (r0 == MP_DIGIT_MAX))))) { +#ifndef MPI_AMD64_ADD + MP_ADD_CARRY(r0, 1, r0, 0, carry); + MP_ADD_CARRY(r1, 1, r1, carry, carry); + MP_ADD_CARRY(r2, 0, r2, carry, carry); +#else + __asm__ ( + "addq $1,%0 \n\t" + "adcq $1,%1 \n\t" + "adcq $0,%2 \n\t" + : "=r"(r0), "=r"(r1), "=r"(r2) + : "0" (r0), "1" (r1), "2" (r2) + : "%cc" ); +#endif + } + + + MP_CHECKOK(s_mp_pad(r, 3)); + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; + MP_SIGN(r) = MP_ZPOS; + MP_USED(r) = 3; + s_mp_clamp(r); + + + CLEANUP: + return res; +} + +/* Compute the diff of 192 bit curves. Do the work in-line since the + * number of words are so small, we don't want to overhead of mp function + * calls. Uses optimized modular reduction for p192. + */ +mp_err +ec_GFp_nistp192_sub(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + mp_digit b0 = 0, b1 = 0, b2 = 0; + mp_digit r0 = 0, r1 = 0, r2 = 0; + mp_digit borrow; + + switch(MP_USED(a)) { + case 3: + r2 = MP_DIGIT(a,2); + case 2: + r1 = MP_DIGIT(a,1); + case 1: + r0 = MP_DIGIT(a,0); + } + + switch(MP_USED(b)) { + case 3: + b2 = MP_DIGIT(b,2); + case 2: + b1 = MP_DIGIT(b,1); + case 1: + b0 = MP_DIGIT(b,0); + } + +#ifndef MPI_AMD64_ADD + MP_SUB_BORROW(r0, b0, r0, 0, borrow); + MP_SUB_BORROW(r1, b1, r1, borrow, borrow); + MP_SUB_BORROW(r2, b2, r2, borrow, borrow); +#else + __asm__ ( + "xorq %3,%3 \n\t" + "subq %4,%0 \n\t" + "sbbq %5,%1 \n\t" + "sbbq %6,%2 \n\t" + "adcq $0,%3 \n\t" + : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(borrow) + : "r" (b0), "r" (b1), "r" (b2), "0" (r0), + "1" (r1), "2" (r2) + : "%cc" ); +#endif + + /* Do quick 'add' if we've gone under 0 + * (subtract the 2's complement of the curve field) */ + if (borrow) { +#ifndef MPI_AMD64_ADD + MP_SUB_BORROW(r0, 1, r0, 0, borrow); + MP_SUB_BORROW(r1, 1, r1, borrow, borrow); + MP_SUB_BORROW(r2, 0, r2, borrow, borrow); +#else + __asm__ ( + "subq $1,%0 \n\t" + "sbbq $1,%1 \n\t" + "sbbq $0,%2 \n\t" + : "=r"(r0), "=r"(r1), "=r"(r2) + : "0" (r0), "1" (r1), "2" (r2) + : "%cc" ); #endif + } + + MP_CHECKOK(s_mp_pad(r, 3)); + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; + MP_SIGN(r) = MP_ZPOS; + MP_USED(r) = 3; + s_mp_clamp(r); CLEANUP: return res; } +#endif + /* Compute the square of polynomial a, reduce modulo p192. Store the * result in r. r could be a. Uses optimized modular reduction for p192. */ @@ -215,6 +472,31 @@ ec_GFp_nistp192_mul(const mp_int *a, const mp_int *b, mp_int *r, return res; } +/* Divides two field elements. If a is NULL, then returns the inverse of + * b. */ +mp_err +ec_GFp_nistp192_div(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + mp_int t; + + /* If a is NULL, then return the inverse of b, otherwise return a/b. */ + if (a == NULL) { + return mp_invmod(b, &meth->irr, r); + } else { + /* MPI doesn't support divmod, so we implement it using invmod and + * mulmod. */ + MP_CHECKOK(mp_init(&t)); + MP_CHECKOK(mp_invmod(b, &meth->irr, &t)); + MP_CHECKOK(mp_mul(a, &t, r)); + MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth)); + CLEANUP: + mp_clear(&t); + return res; + } +} + /* Wire in fast field arithmetic and precomputation of base point for * named curves. */ mp_err @@ -224,6 +506,11 @@ ec_group_set_gfp192(ECGroup *group, ECCurveName name) group->meth->field_mod = &ec_GFp_nistp192_mod; group->meth->field_mul = &ec_GFp_nistp192_mul; group->meth->field_sqr = &ec_GFp_nistp192_sqr; + group->meth->field_div = &ec_GFp_nistp192_div; +#ifndef ECL_THIRTY_TWO_BIT + group->meth->field_add = &ec_GFp_nistp192_add; + group->meth->field_sub = &ec_GFp_nistp192_sub; +#endif } return MP_OKAY; } |