summaryrefslogtreecommitdiff
path: root/security/nss/lib/freebl/ecl/ecp_192.c
diff options
context:
space:
mode:
Diffstat (limited to 'security/nss/lib/freebl/ecl/ecp_192.c')
-rw-r--r--security/nss/lib/freebl/ecl/ecp_192.c469
1 files changed, 378 insertions, 91 deletions
diff --git a/security/nss/lib/freebl/ecl/ecp_192.c b/security/nss/lib/freebl/ecl/ecp_192.c
index 26867ae3e..f4cd42bc3 100644
--- a/security/nss/lib/freebl/ecl/ecp_192.c
+++ b/security/nss/lib/freebl/ecl/ecp_192.c
@@ -42,6 +42,8 @@
#include "mpi-priv.h"
#include <stdlib.h>
+#define ECP192_DIGITS ECL_CURVE_DIGITS(192)
+
/* Fast modular reduction for p192 = 2^192 - 2^64 - 1. a can be r. Uses
* algorithm 7 from Brown, Hankerson, Lopez, Menezes. Software
* Implementation of the NIST Elliptic Curves over Prime Fields. */
@@ -50,101 +52,127 @@ ec_GFp_nistp192_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
{
mp_err res = MP_OKAY;
mp_size a_used = MP_USED(a);
-
- /* s is a statically-allocated mp_int of exactly the size we need */
- mp_int s;
-
+ mp_digit r3;
+#ifndef MPI_AMD64_ADD
+ mp_digit carry;
+#endif
#ifdef ECL_THIRTY_TWO_BIT
- mp_digit sa[6];
- mp_digit a11 = 0, a10, a9 = 0, a8, a7 = 0, a6;
-
- MP_SIGN(&s) = MP_ZPOS;
- MP_ALLOC(&s) = 6;
- MP_USED(&s) = 6;
- MP_DIGITS(&s) = sa;
+ mp_digit a5a = 0, a5b = 0, a4a = 0, a4b = 0, a3a = 0, a3b = 0;
+ mp_digit r0a, r0b, r1a, r1b, r2a, r2b;
#else
- mp_digit sa[3];
mp_digit a5 = 0, a4 = 0, a3 = 0;
-
- MP_SIGN(&s) = MP_ZPOS;
- MP_ALLOC(&s) = 3;
- MP_USED(&s) = 3;
- MP_DIGITS(&s) = sa;
+ mp_digit r0, r1, r2;
#endif
/* reduction not needed if a is not larger than field size */
-#ifdef ECL_THIRTY_TWO_BIT
- if (a_used < 6) {
-#else
- if (a_used < 3) {
-#endif
+ if (a_used < ECP192_DIGITS) {
+ if (a == r) {
+ return MP_OKAY;
+ }
return mp_copy(a, r);
}
-#ifdef ECL_THIRTY_TWO_BIT
+
/* for polynomials larger than twice the field size, use regular
* reduction */
- if (a_used > 12) {
+ if (a_used > ECP192_DIGITS*2) {
MP_CHECKOK(mp_mod(a, &meth->irr, r));
} else {
/* copy out upper words of a */
+
+#ifdef ECL_THIRTY_TWO_BIT
+
+ /* in all the math below,
+ * nXb is most signifiant, nXa is least significant */
switch (a_used) {
case 12:
- a11 = MP_DIGIT(a, 11);
+ a5b = MP_DIGIT(a, 11);
case 11:
- a10 = MP_DIGIT(a, 10);
+ a5a = MP_DIGIT(a, 10);
case 10:
- a9 = MP_DIGIT(a, 9);
+ a4b = MP_DIGIT(a, 9);
case 9:
- a8 = MP_DIGIT(a, 8);
+ a4a = MP_DIGIT(a, 8);
case 8:
- a7 = MP_DIGIT(a, 7);
+ a3b = MP_DIGIT(a, 7);
case 7:
- a6 = MP_DIGIT(a, 6);
+ a3a = MP_DIGIT(a, 6);
}
+
+
+ r2b= MP_DIGIT(a, 5);
+ r2a= MP_DIGIT(a, 4);
+ r1b = MP_DIGIT(a, 3);
+ r1a = MP_DIGIT(a, 2);
+ r0b = MP_DIGIT(a, 1);
+ r0a = MP_DIGIT(a, 0);
+
+ /* implement r = (a2,a1,a0)+(a5,a5,a5)+(a4,a4,0)+(0,a3,a3) */
+ MP_ADD_CARRY(r0a, a3a, r0a, 0, carry);
+ MP_ADD_CARRY(r0b, a3b, r0b, carry, carry);
+ MP_ADD_CARRY(r1a, a3a, r1a, carry, carry);
+ MP_ADD_CARRY(r1b, a3b, r1b, carry, carry);
+ MP_ADD_CARRY(r2a, a4a, r2a, carry, carry);
+ MP_ADD_CARRY(r2b, a4b, r2b, carry, carry);
+ r3 = carry; carry = 0;
+ MP_ADD_CARRY(r0a, a5a, r0a, 0, carry);
+ MP_ADD_CARRY(r0b, a5b, r0b, carry, carry);
+ MP_ADD_CARRY(r1a, a5a, r1a, carry, carry);
+ MP_ADD_CARRY(r1b, a5b, r1b, carry, carry);
+ MP_ADD_CARRY(r2a, a5a, r2a, carry, carry);
+ MP_ADD_CARRY(r2b, a5b, r2b, carry, carry);
+ r3 += carry;
+ MP_ADD_CARRY(r1a, a4a, r1a, 0, carry);
+ MP_ADD_CARRY(r1b, a4b, r1b, carry, carry);
+ MP_ADD_CARRY(r2a, 0, r2a, carry, carry);
+ MP_ADD_CARRY(r2b, 0, r2b, carry, carry);
+ r3 += carry;
+
+ /* reduce out the carry */
+ while (r3) {
+ MP_ADD_CARRY(r0a, r3, r0a, 0, carry);
+ MP_ADD_CARRY(r0b, 0, r0b, carry, carry);
+ MP_ADD_CARRY(r1a, r3, r1a, carry, carry);
+ MP_ADD_CARRY(r1b, 0, r1b, carry, carry);
+ MP_ADD_CARRY(r2a, 0, r2a, carry, carry);
+ MP_ADD_CARRY(r2b, 0, r2b, carry, carry);
+ r3 = carry;
+ }
+
+ /* check for final reduction */
+ /*
+ * our field is 0xffffffffffffffff, 0xfffffffffffffffe,
+ * 0xffffffffffffffff. That means we can only be over and need
+ * one more reduction
+ * if r2 == 0xffffffffffffffffff (same as r2+1 == 0)
+ * and
+ * r1 == 0xffffffffffffffffff or
+ * r1 == 0xfffffffffffffffffe and r0 = 0xfffffffffffffffff
+ * In all cases, we subtract the field (or add the 2's
+ * complement value (1,1,0)). (r0, r1, r2)
+ */
+ if (((r2b == 0xffffffff) && (r2a == 0xffffffff)
+ && (r1b == 0xffffffff) ) &&
+ ((r1a == 0xffffffff) ||
+ (r1a == 0xfffffffe) && (r0a == 0xffffffff) &&
+ (r0b == 0xffffffff)) ) {
+ /* do a quick subtract */
+ MP_ADD_CARRY(r0a, 1, r0a, 0, carry);
+ r0b += carry;
+ r1a = r1b = r2a = r2b = 0;
+ }
+
/* set the lower words of r */
if (a != r) {
- MP_CHECKOK(s_mp_pad(r, 7));
- MP_DIGIT(r, 5) = MP_DIGIT(a, 5);
- MP_DIGIT(r, 4) = MP_DIGIT(a, 4);
- MP_DIGIT(r, 3) = MP_DIGIT(a, 3);
- MP_DIGIT(r, 2) = MP_DIGIT(a, 2);
- MP_DIGIT(r, 1) = MP_DIGIT(a, 1);
- MP_DIGIT(r, 0) = MP_DIGIT(a, 0);
+ MP_CHECKOK(s_mp_pad(r, 6));
}
+ MP_DIGIT(r, 5) = r2b;
+ MP_DIGIT(r, 4) = r2a;
+ MP_DIGIT(r, 3) = r1b;
+ MP_DIGIT(r, 2) = r1a;
+ MP_DIGIT(r, 1) = r0b;
+ MP_DIGIT(r, 0) = r0a;
MP_USED(r) = 6;
- /* compute r = s1 + s2 + s3 + s4, where s1 = (a2,a1,a0), s2 =
- * (0,a3,a3), s3 = (a4,a4,0), and s4 = (a5,a5,a5), for
- * sixty-four-bit words */
- switch (a_used) {
- case 12:
- case 11:
- sa[5] = sa[3] = sa[1] = a11;
- sa[4] = sa[2] = sa[0] = a10;
- MP_CHECKOK(mp_add(r, &s, r));
- case 10:
- case 9:
- sa[5] = sa[3] = a9;
- sa[4] = sa[2] = a8;
- sa[1] = sa[0] = 0;
- MP_CHECKOK(mp_add(r, &s, r));
- case 8:
- case 7:
- sa[5] = sa[4] = 0;
- sa[3] = sa[1] = a7;
- sa[2] = sa[0] = a6;
- MP_CHECKOK(mp_add(r, &s, r));
- }
- /* there might be 1 or 2 bits left to reduce; use regular
- * reduction for this */
- MP_CHECKOK(mp_mod(r, &meth->irr, r));
- }
#else
- /* for polynomials larger than twice the field size, use regular
- * reduction */
- if (a_used > 6) {
- MP_CHECKOK(mp_mod(a, &meth->irr, r));
- } else {
- /* copy out upper words of a */
switch (a_used) {
case 6:
a5 = MP_DIGIT(a, 5);
@@ -153,39 +181,268 @@ ec_GFp_nistp192_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
case 4:
a3 = MP_DIGIT(a, 3);
}
+
+ r2 = MP_DIGIT(a, 2);
+ r1 = MP_DIGIT(a, 1);
+ r0 = MP_DIGIT(a, 0);
+
+ /* implement r = (a2,a1,a0)+(a5,a5,a5)+(a4,a4,0)+(0,a3,a3) */
+#ifndef MPI_AMD64_ADD
+ MP_ADD_CARRY(r0, a3, r0, 0, carry);
+ MP_ADD_CARRY(r1, a3, r1, carry, carry);
+ MP_ADD_CARRY(r2, a4, r2, carry, carry);
+ r3 = carry;
+ MP_ADD_CARRY(r0, a5, r0, 0, carry);
+ MP_ADD_CARRY(r1, a5, r1, carry, carry);
+ MP_ADD_CARRY(r2, a5, r2, carry, carry);
+ r3 += carry;
+ MP_ADD_CARRY(r1, a4, r1, 0, carry);
+ MP_ADD_CARRY(r2, 0, r2, carry, carry);
+ r3 += carry;
+
+#else
+ r2 = MP_DIGIT(a, 2);
+ r1 = MP_DIGIT(a, 1);
+ r0 = MP_DIGIT(a, 0);
+
+ /* set the lower words of r */
+ __asm__ (
+ "xorq %3,%3 \n\t"
+ "addq %4,%0 \n\t"
+ "adcq %4,%1 \n\t"
+ "adcq %5,%2 \n\t"
+ "adcq $0,%3 \n\t"
+ "addq %6,%0 \n\t"
+ "adcq %6,%1 \n\t"
+ "adcq %6,%2 \n\t"
+ "adcq $0,%3 \n\t"
+ "addq %5,%1 \n\t"
+ "adcq $0,%2 \n\t"
+ "adcq $0,%3 \n\t"
+ : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(a3),
+ "=r"(a4), "=r"(a5)
+ : "0" (r0), "1" (r1), "2" (r2), "3" (r3),
+ "4" (a3), "5" (a4), "6"(a5)
+ : "%cc" );
+#endif
+
+ /* reduce out the carry */
+ while (r3) {
+#ifndef MPI_AMD64_ADD
+ MP_ADD_CARRY(r0, r3, r0, 0, carry);
+ MP_ADD_CARRY(r1, r3, r1, carry, carry);
+ MP_ADD_CARRY(r2, 0, r2, carry, carry);
+ r3 = carry;
+#else
+ a3=r3;
+ __asm__ (
+ "xorq %3,%3 \n\t"
+ "addq %4,%0 \n\t"
+ "adcq %4,%1 \n\t"
+ "adcq $0,%2 \n\t"
+ "adcq $0,%3 \n\t"
+ : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(a3)
+ : "0" (r0), "1" (r1), "2" (r2), "3" (r3), "4"(a3)
+ : "%cc" );
+#endif
+ }
+
+ /* check for final reduction */
+ /*
+ * our field is 0xffffffffffffffff, 0xfffffffffffffffe,
+ * 0xffffffffffffffff. That means we can only be over and need
+ * one more reduction
+ * if r2 == 0xffffffffffffffffff (same as r2+1 == 0)
+ * and
+ * r1 == 0xffffffffffffffffff or
+ * r1 == 0xfffffffffffffffffe and r0 = 0xfffffffffffffffff
+ * In all cases, we subtract the field (or add the 2's
+ * complement value (1,1,0)). (r0, r1, r2)
+ */
+ if (r3 || ((r2 == MP_DIGIT_MAX) &&
+ ((r1 == MP_DIGIT_MAX) ||
+ ((r1 == (MP_DIGIT_MAX-1)) && (r0 == MP_DIGIT_MAX))))) {
+ /* do a quick subtract */
+ r0++;
+ r1 = r2 = 0;
+ }
/* set the lower words of r */
if (a != r) {
- MP_CHECKOK(s_mp_pad(r, 4));
- MP_DIGIT(r, 2) = MP_DIGIT(a, 2);
- MP_DIGIT(r, 1) = MP_DIGIT(a, 1);
- MP_DIGIT(r, 0) = MP_DIGIT(a, 0);
+ MP_CHECKOK(s_mp_pad(r, 3));
}
+ MP_DIGIT(r, 2) = r2;
+ MP_DIGIT(r, 1) = r1;
+ MP_DIGIT(r, 0) = r0;
MP_USED(r) = 3;
- /* compute r = s1 + s2 + s3 + s4, where s1 = (a2,a1,a0), s2 =
- * (0,a3,a3), s3 = (a4,a4,0), and s4 = (a5,a5,a5) */
- switch (a_used) {
- case 6:
- sa[2] = sa[1] = sa[0] = a5;
- MP_CHECKOK(mp_add(r, &s, r));
- case 5:
- sa[2] = sa[1] = a4;
- sa[0] = 0;
- MP_CHECKOK(mp_add(r, &s, r));
- case 4:
- sa[2] = 0;
- sa[1] = sa[0] = a3;
- MP_CHECKOK(mp_add(r, &s, r));
- }
- /* there might be 1 or 2 bits left to reduce; use regular
- * reduction for this */
- MP_CHECKOK(mp_mod(r, &meth->irr, r));
+#endif
}
+
+ CLEANUP:
+ return res;
+}
+
+#ifndef ECL_THIRTY_TWO_BIT
+/* Compute the sum of 192 bit curves. Do the work in-line since the
+ * number of words are so small, we don't want to overhead of mp function
+ * calls. Uses optimized modular reduction for p192.
+ */
+mp_err
+ec_GFp_nistp192_add(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+ mp_digit a0 = 0, a1 = 0, a2 = 0;
+ mp_digit r0 = 0, r1 = 0, r2 = 0;
+ mp_digit carry;
+
+ switch(MP_USED(a)) {
+ case 3:
+ a2 = MP_DIGIT(a,2);
+ case 2:
+ a1 = MP_DIGIT(a,1);
+ case 1:
+ a0 = MP_DIGIT(a,0);
+ }
+ switch(MP_USED(b)) {
+ case 3:
+ r2 = MP_DIGIT(b,2);
+ case 2:
+ r1 = MP_DIGIT(b,1);
+ case 1:
+ r0 = MP_DIGIT(b,0);
+ }
+
+#ifndef MPI_AMD64_ADD
+ MP_ADD_CARRY(a0, r0, r0, 0, carry);
+ MP_ADD_CARRY(a1, r1, r1, carry, carry);
+ MP_ADD_CARRY(a2, r2, r2, carry, carry);
+#else
+ __asm__ (
+ "xorq %3,%3 \n\t"
+ "addq %4,%0 \n\t"
+ "adcq %5,%1 \n\t"
+ "adcq %6,%2 \n\t"
+ "adcq $0,%3 \n\t"
+ : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(carry)
+ : "r" (a0), "r" (a1), "r" (a2), "0" (r0),
+ "1" (r1), "2" (r2)
+ : "%cc" );
+#endif
+
+ /* Do quick 'subract' if we've gone over
+ * (add the 2's complement of the curve field) */
+ if (carry || ((r2 == MP_DIGIT_MAX) &&
+ ((r1 == MP_DIGIT_MAX) ||
+ ((r1 == (MP_DIGIT_MAX-1)) && (r0 == MP_DIGIT_MAX))))) {
+#ifndef MPI_AMD64_ADD
+ MP_ADD_CARRY(r0, 1, r0, 0, carry);
+ MP_ADD_CARRY(r1, 1, r1, carry, carry);
+ MP_ADD_CARRY(r2, 0, r2, carry, carry);
+#else
+ __asm__ (
+ "addq $1,%0 \n\t"
+ "adcq $1,%1 \n\t"
+ "adcq $0,%2 \n\t"
+ : "=r"(r0), "=r"(r1), "=r"(r2)
+ : "0" (r0), "1" (r1), "2" (r2)
+ : "%cc" );
+#endif
+ }
+
+
+ MP_CHECKOK(s_mp_pad(r, 3));
+ MP_DIGIT(r, 2) = r2;
+ MP_DIGIT(r, 1) = r1;
+ MP_DIGIT(r, 0) = r0;
+ MP_SIGN(r) = MP_ZPOS;
+ MP_USED(r) = 3;
+ s_mp_clamp(r);
+
+
+ CLEANUP:
+ return res;
+}
+
+/* Compute the diff of 192 bit curves. Do the work in-line since the
+ * number of words are so small, we don't want to overhead of mp function
+ * calls. Uses optimized modular reduction for p192.
+ */
+mp_err
+ec_GFp_nistp192_sub(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+ mp_digit b0 = 0, b1 = 0, b2 = 0;
+ mp_digit r0 = 0, r1 = 0, r2 = 0;
+ mp_digit borrow;
+
+ switch(MP_USED(a)) {
+ case 3:
+ r2 = MP_DIGIT(a,2);
+ case 2:
+ r1 = MP_DIGIT(a,1);
+ case 1:
+ r0 = MP_DIGIT(a,0);
+ }
+
+ switch(MP_USED(b)) {
+ case 3:
+ b2 = MP_DIGIT(b,2);
+ case 2:
+ b1 = MP_DIGIT(b,1);
+ case 1:
+ b0 = MP_DIGIT(b,0);
+ }
+
+#ifndef MPI_AMD64_ADD
+ MP_SUB_BORROW(r0, b0, r0, 0, borrow);
+ MP_SUB_BORROW(r1, b1, r1, borrow, borrow);
+ MP_SUB_BORROW(r2, b2, r2, borrow, borrow);
+#else
+ __asm__ (
+ "xorq %3,%3 \n\t"
+ "subq %4,%0 \n\t"
+ "sbbq %5,%1 \n\t"
+ "sbbq %6,%2 \n\t"
+ "adcq $0,%3 \n\t"
+ : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(borrow)
+ : "r" (b0), "r" (b1), "r" (b2), "0" (r0),
+ "1" (r1), "2" (r2)
+ : "%cc" );
+#endif
+
+ /* Do quick 'add' if we've gone under 0
+ * (subtract the 2's complement of the curve field) */
+ if (borrow) {
+#ifndef MPI_AMD64_ADD
+ MP_SUB_BORROW(r0, 1, r0, 0, borrow);
+ MP_SUB_BORROW(r1, 1, r1, borrow, borrow);
+ MP_SUB_BORROW(r2, 0, r2, borrow, borrow);
+#else
+ __asm__ (
+ "subq $1,%0 \n\t"
+ "sbbq $1,%1 \n\t"
+ "sbbq $0,%2 \n\t"
+ : "=r"(r0), "=r"(r1), "=r"(r2)
+ : "0" (r0), "1" (r1), "2" (r2)
+ : "%cc" );
#endif
+ }
+
+ MP_CHECKOK(s_mp_pad(r, 3));
+ MP_DIGIT(r, 2) = r2;
+ MP_DIGIT(r, 1) = r1;
+ MP_DIGIT(r, 0) = r0;
+ MP_SIGN(r) = MP_ZPOS;
+ MP_USED(r) = 3;
+ s_mp_clamp(r);
CLEANUP:
return res;
}
+#endif
+
/* Compute the square of polynomial a, reduce modulo p192. Store the
* result in r. r could be a. Uses optimized modular reduction for p192.
*/
@@ -215,6 +472,31 @@ ec_GFp_nistp192_mul(const mp_int *a, const mp_int *b, mp_int *r,
return res;
}
+/* Divides two field elements. If a is NULL, then returns the inverse of
+ * b. */
+mp_err
+ec_GFp_nistp192_div(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+ mp_int t;
+
+ /* If a is NULL, then return the inverse of b, otherwise return a/b. */
+ if (a == NULL) {
+ return mp_invmod(b, &meth->irr, r);
+ } else {
+ /* MPI doesn't support divmod, so we implement it using invmod and
+ * mulmod. */
+ MP_CHECKOK(mp_init(&t));
+ MP_CHECKOK(mp_invmod(b, &meth->irr, &t));
+ MP_CHECKOK(mp_mul(a, &t, r));
+ MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth));
+ CLEANUP:
+ mp_clear(&t);
+ return res;
+ }
+}
+
/* Wire in fast field arithmetic and precomputation of base point for
* named curves. */
mp_err
@@ -224,6 +506,11 @@ ec_group_set_gfp192(ECGroup *group, ECCurveName name)
group->meth->field_mod = &ec_GFp_nistp192_mod;
group->meth->field_mul = &ec_GFp_nistp192_mul;
group->meth->field_sqr = &ec_GFp_nistp192_sqr;
+ group->meth->field_div = &ec_GFp_nistp192_div;
+#ifndef ECL_THIRTY_TWO_BIT
+ group->meth->field_add = &ec_GFp_nistp192_add;
+ group->meth->field_sub = &ec_GFp_nistp192_sub;
+#endif
}
return MP_OKAY;
}