summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorrelyea%netscape.com <devnull@localhost>2005-11-11 19:53:26 +0000
committerrelyea%netscape.com <devnull@localhost>2005-11-11 19:53:26 +0000
commit885fa706d4fc6806355c184035d4c3553b57a5e2 (patch)
tree95f25a28a5d924c7ad4ee9b8f0995f637ebb8d20
parent146060d17ac1ee1fabef61661ce40bb4f869f694 (diff)
downloadnss-hg-885fa706d4fc6806355c184035d4c3553b57a5e2.tar.gz
commit previous reviewd version of mpmontg.c to the TMP branch as a baseline
for the 'final' version.
-rw-r--r--security/nss/lib/freebl/mpi/mpmontg.c480
1 files changed, 350 insertions, 130 deletions
diff --git a/security/nss/lib/freebl/mpi/mpmontg.c b/security/nss/lib/freebl/mpi/mpmontg.c
index cc4d233fd..13a95b172 100644
--- a/security/nss/lib/freebl/mpi/mpmontg.c
+++ b/security/nss/lib/freebl/mpi/mpmontg.c
@@ -50,6 +50,7 @@
/* #define MP_USING_MONT_MULF 1 */
#define MP_USING_CACHE_SAFE_MOD_EXP 1
#define MP_USING_WEAVE_COPY 1
+#define MP_CHAR_STORE_SLOW 1
#include <string.h>
#include "mpi-priv.h"
#include "mp_gf2m-priv.h"
@@ -58,10 +59,15 @@
#ifdef MP_USING_MONT_MULF
#include "montmulf.h"
#endif
-#include "prtypes.h"
+#include <stddef.h> /* ptrdiff_t */
-#include <fcntl.h>
-#include <unistd.h>
+/* need to know endianness of this platform. If we aren't told, get it from
+ * nspr... */
+#ifdef MP_CHAR_STORE_SLOW
+#if !defined(IS_BIG_ENDIAN) && !defined(IS_LITTLE_ENDIAN)
+#include "prcpucfg.h"
+#endif
+#endif
#define STATIC
/* #define DEBUG 1 */
@@ -70,8 +76,8 @@
#define MAX_ODD_INTS 32 /* 2 ** (WINDOW_BITS - 1) */
#define MAX_POWERS MAX_ODD_INTS*2
#define MAX_MODULUS_BITS 8192
-#define MAX_MODULUS_LENGTH (MAX_MODULUS_BITS/8)
-#define MAX_MODULUS_DIGITS (MAX_MODULUS_LENGTH/sizeof(mp_digit))
+#define MAX_MODULUS_BYTES (MAX_MODULUS_BITS/8)
+#define MAX_MODULUS_DIGITS (MAX_MODULUS_BYTES/sizeof(mp_digit))
#if defined(_WIN32_WCE)
#define ABORT res = MP_UNDEF; goto CLEANUP
@@ -523,77 +529,97 @@ CLEANUP:
#undef MUL
#ifdef MP_USING_CACHE_SAFE_MOD_EXP
-
unsigned int mp_using_cache_safe_exp = 1;
+#endif
-void mp_set_mode_modify() { mp_using_cache_safe_exp = 0; }
-void mp_set_mode_safe() { mp_using_cache_safe_exp = 1; }
+mp_err mp_set_modexp_mode(int value)
+{
+#ifdef MP_USING_CACHE_SAFE_MOD_EXP
+ mp_using_cache_safe_exp = value;
+ return MP_OKAY;
+#else
+ if (value == 0) {
+ return MP_OKAY;
+ }
+ return MP_BADARG;
+#endif
+}
+
+
+#ifdef MP_USING_CACHE_SAFE_MOD_EXP
#ifndef MP_USING_WEAVE_COPY
-#if MP_DIGIT_BITS == 32
+#ifndef MP_CHAR_STORE_SLOW
+#define WEAVE_BASE_INIT \
+ unsigned char *_ptr;
+
+#define WEAVE_FIRST(bi,b,count) \
+ _ptr = (unsigned char *)bi; \
+ *_ptr++ = *b; b+= count;
+
+#define WEAVE_MIDDLE(bi,b,count) \
+ *_ptr++ = *b; b+= count;
+
+#define WEAVE_LAST(bi,b,count) \
+ *_ptr++ = *b; b+= count;
+
+#else
+#define WEAVE_BASE_INIT \
+ register mp_digit _digit;
+
+#define WEAVE_FIRST(bi,b,count) \
+ _digit = *b << 8; b += count;
+
+#define WEAVE_MIDDLE(bi,b,count) \
+ _digit |= *b; b += count; _digit = _digit << 8;
+
+#define WEAVE_LAST(bi,b,count) \
+ _digit |= *b; b += count; \
+ *bi = _digit;
+#endif /* MP_CHAR_STORE_SLOW */
+
+#if MP_DIGIT_BITS == 32
#define WEAVE_INIT \
- unsigned char *_ptr;
+ WEAVE_BASE_INIT
#define WEAVE_FETCH(bi, b, count) \
- _ptr = (unsigned char *)bi; \
- *_ptr++ = *b; b+= count; \
- *_ptr++ = *b; b+= count; \
- *_ptr++ = *b; b+= count; \
- *_ptr++ = *b; b+= count;
-
-#define WEAVE_PUT(bi, b, count) \
- _ptr = (unsigned char *)bi; \
- *b = *_ptr++; b+= count; \
- *b = *_ptr++; b+= count; \
- *b = *_ptr++; b+= count; \
- *b = *_ptr++; b+= count;
+ WEAVE_FIRST(bi,b,count) \
+ WEAVE_MIDDLE(bi,b,count) \
+ WEAVE_MIDDLE(bi,b,count) \
+ WEAVE_LAST(bi,b,count)
+
#else
-#if MP_DIGIT_BITS == 64
+#ifdef MP_DIGIT_BITS == 64
+
#define WEAVE_INIT \
- unsigned char *_ptr
+ WEAVE_BASE_INIT
#define WEAVE_FETCH(bi, b, count) \
- _ptr = (unsigned char *)bi; \
- *_ptr++ = *b; b+= count; \
- *_ptr++ = *b; b+= count; \
- *_ptr++ = *b; b+= count; \
- *_ptr++ = *b; b+= count; \
- *_ptr++ = *b; b+= count; \
- *_ptr++ = *b; b+= count; \
- *_ptr++ = *b; b+= count; \
- *_ptr++ = *b; b+= count;
-
-#define WEAVE_PUT(bi, b, count) \
- _ptr = (unsigned char *)bi; \
- *b = *_ptr++; b+= count; \
- *b = *_ptr++; b+= count; \
- *b = *_ptr++; b+= count; \
- *b = *_ptr++; b+= count; \
- *b = *_ptr++; b+= count; \
- *b = *_ptr++; b+= count; \
- *b = *_ptr++; b+= count; \
- *b = *_ptr++; b+= count;
+ WEAVE_FIRST(bi,b,count) \
+ WEAVE_MIDDLE(bi,b,count) \
+ WEAVE_MIDDLE(bi,b,count) \
+ WEAVE_MIDDLE(bi,b,count) \
+ WEAVE_MIDDLE(bi,b,count) \
+ WEAVE_MIDDLE(bi,b,count) \
+ WEAVE_MIDDLE(bi,b,count) \
+ WEAVE_LAST(bi,b,count)
+
#else
#define WEAVE_INIT \
- int _i; \
- unsigned char *_ptr;
+ int _i; \
+ WEAVE_BASE_INIT
- /* It would be nice to unroll this loop as well */
+ /* It would be nice to unroll this loop as well */
#define WEAVE_FETCH(bi, b, count) \
- _ptr = (unsigned char *)bi; \
- for (_i=0; _i < sizeof mp_digit ; _i++) { \
- *_ptr++ = *b; \
- b+=count; \
- }
-
-#define WEAVE_PUT(bi, b, count) \
- _ptr = (unsigned char *)bi; \
- for (_i=0; _i < sizeof mp_digit ; _i++) { \
- *b = *_ptr++; \
- b+=count; \
- }
-#endif
+ WEAVE_FIRST(bi,b,count) \
+ WEAVE_LAST(bi,b,count) \
+ for (_i=1; _i < sizeof(mp_digit) -1 ; _i++) { \
+ WEAVE_MIDDLE(bi,b,count) \
+ } \
+ WEAVE_LAST(bi,b,count)
+
+#endif
#endif
#if !defined(MP_MONT_USE_MP_MUL)
@@ -702,64 +728,241 @@ CLEANUP:
} /* end mp_mul() */
#endif /* MP_USING_WEAVE_COPY */
+#define WEAVE_WORD_SIZE 4
+
+#ifndef MP_CHAR_STORE_SLOW
+mp_err mpi_to_weave(const mp_int *a, unsigned char *b,
+ mp_size b_size, mp_size count)
+{
+ mp_size i, j;
+ unsigned char *bsave = b;
+
+ for (i=0; i < WEAVE_WORD_SIZE; i++) {
+ unsigned char *pb = (unsigned char *)MP_DIGITS(&a[i]);
+ mp_size useda = MP_USED(&a[i]);
+ mp_size zero = b_size - useda;
+ unsigned char *end = pb+ (useda*sizeof(mp_digit));
+ b = bsave+i;
+
+
+ ARGCHK(MP_SIGN(&a[i]) == 0, MP_BADARG);
+ ARGCHK(useda <= b_size, MP_BADARG);
+
+ for (; pb < end; pb++) {
+ *b = *pb;
+ b += count;
+ }
+ for (j=0; j < zero; j++) {
+ *b = 0;
+ b += count;
+ }
+ }
+
+ return MP_OKAY;
+}
+#else
+/* Need a primitive that we know is 32 bits long... */
+#if UINT_MAX == MP_32BIT_MAX
+typedef unsigned int mp_weave_word;
+#else
+#if ULONG_MAX == MP_32BIT_MAX
+typedef unsigned long mp_weave_word;
+#else
+#error "Can't find 32 bit primitive type for this platform"
+#endif
+#endif
+
+/*
+ * on some platforms character stores into memory is very expensive since they
+ * generate a read/modify/write operation on the bus. On those platforms
+ * we need to do integer writes to the bus.
+ *
+ * The weave_to_mpi function in those cases expect the data to be laid out in
+ * big endian, interleaved.
+ *
+ * since we need to interleave on a byte by byte basis, we need to collect
+ * several mpi structures together into a single uint32 before we write. We
+ * also need to make sure the uint32 is arranged so that the first value of
+ * the first array winds up in b[0]. This means construction of that uint32
+ * is endian specific (even though the layout of the array is always big
+ * endian.
+ */
mp_err mpi_to_weave(const mp_int *a, unsigned char *b,
mp_size b_size, mp_size count)
{
mp_size i;
- unsigned char *pb = (unsigned char *)MP_DIGITS(a);
- mp_size useda = MP_USED(a);
- mp_size zero = b_size - useda;
- unsigned char *end = pb+ (useda*sizeof(mp_digit));
+ mp_digit *digitsa0;
+ mp_digit *digitsa1;
+ mp_digit *digitsa2;
+ mp_digit *digitsa3;
+ mp_size useda0;
+ mp_size useda1;
+ mp_size useda2;
+ mp_size useda3;
+ mp_weave_word *weaved = (mp_weave_word *)b;
+#if MP_DIGIT_BITS != 32 && MP_DIGIT_BITS != 64
+ mp_size j;
+#endif
- ARGCHK(MP_SIGN(a) == 0, MP_BADARG);
- ARGCHK(useda <= b_size, MP_BADARG);
+ count = count/sizeof(mp_weave_word);
+
+ /* this code pretty much depends on this ! */
+ /*assert(WEAVE_WORD_SIZE == 4); */
+
+ digitsa0 = MP_DIGITS(&a[0]);
+ digitsa1 = MP_DIGITS(&a[1]);
+ digitsa2 = MP_DIGITS(&a[2]);
+ digitsa3 = MP_DIGITS(&a[3]);
+ useda0 = MP_USED(&a[0]);
+ useda1 = MP_USED(&a[1]);
+ useda2 = MP_USED(&a[2]);
+ useda3 = MP_USED(&a[3]);
+
+ ARGCHK(MP_SIGN(&a[0]) == 0, MP_BADARG);
+ ARGCHK(MP_SIGN(&a[1]) == 0, MP_BADARG);
+ ARGCHK(MP_SIGN(&a[2]) == 0, MP_BADARG);
+ ARGCHK(MP_SIGN(&a[3]) == 0, MP_BADARG);
+ ARGCHK(useda0 <= b_size, MP_BADARG);
+ ARGCHK(useda1 <= b_size, MP_BADARG);
+ ARGCHK(useda2 <= b_size, MP_BADARG);
+ ARGCHK(useda3 <= b_size, MP_BADARG);
+
+#define SAFE_FETCH(digit, used, word) ((i) < (used) ? (digit[i]) : 0)
+
+ for (i=0; i < b_size; i++) {
+ mp_digit d0 = SAFE_FETCH(digitsa0,useda0,i);
+ mp_digit d1 = SAFE_FETCH(digitsa1,useda1,i);
+ mp_digit d2 = SAFE_FETCH(digitsa2,useda2,i);
+ mp_digit d3 = SAFE_FETCH(digitsa3,useda3,i);
+ register mp_weave_word acc;
- for (; pb < end; pb++) {
- *b = *pb;
- b += count;
- }
- for (i=0; i < zero; i++) {
- *b = 0;
- b += count;
+/*
+ * ONE_STEP takes to MSB of each of our current digits and places that
+ * byte in the appropriate position for writing to the weaved array.
+ * On little endian:
+ * b3 b2 b1 b0
+ * On big endian:
+ * b0 b1 b2 b3
+ * When the data is written it would always wind up:
+ * b[0] = b0
+ * b[1] = b1
+ * b[2] = b2
+ * b[3] = b3
+ *
+ * Once weave written the MSB, we shift the whole digit up left one
+ * byte, putting the Next Most Significant Byte in the MSB position,
+ * so we we repeat the next one step that byte will be written.
+ */
+#ifdef IS_LITTLE_ENDIAN
+#define MPI_WEAVE_ONE_STEP \
+ acc = (d0 >> (MP_DIGIT_BITS-8)) & 0xff ; d0 <<= 8; /*b0*/ \
+ acc |= (d1 >> (MP_DIGIT_BITS-16)) & 0xff00 ; d1 <<= 8; /*b1*/ \
+ acc |= (d2 >> (MP_DIGIT_BITS-24)) & 0xff0000 ; d2 <<= 8; /*b2*/ \
+ acc |= (d3 >> (MP_DIGIT_BITS-32)) & 0xff000000; d3 <<= 8; /*b3*/ \
+ *weaved = acc; weaved += count;
+#else
+#error "Intel is Little endian, but IS_LITTLE_ENDIAN is not defined!"
+#define MPI_WEAVE_ONE_STEP \
+ acc = (d0 >> (MP_DIGIT_BITS-32)) & 0xff000000; d0 <<= 8; /*b0*/ \
+ acc |= (d1 >> (MP_DIGIT_BITS-24)) & 0xff0000 ; d1 <<= 8; /*b1*/ \
+ acc |= (d2 >> (MP_DIGIT_BITS-16)) & 0xff00 ; d2 <<= 8; /*b2*/ \
+ acc |= (d3 >> (MP_DIGIT_BITS-8)) & 0xff ; d3 <<= 8; /*b3*/ \
+ *weaved = acc; weaved += count;
+#endif
+
+#if MP_DIGIT_BITS == 32 || MP_DIGIT_BITS == 64
+ MPI_WEAVE_ONE_STEP
+ MPI_WEAVE_ONE_STEP
+ MPI_WEAVE_ONE_STEP
+ MPI_WEAVE_ONE_STEP
+#if MP_DIGIT_BITS == 64
+ MPI_WEAVE_ONE_STEP
+ MPI_WEAVE_ONE_STEP
+ MPI_WEAVE_ONE_STEP
+ MPI_WEAVE_ONE_STEP
+#endif
+#else
+ for (j=0; j < sizeof (mp_digit); j++) {
+ MPI_WEAVE_ONE_STEP
+ }
+#endif
}
return MP_OKAY;
}
+#endif
#ifdef MP_USING_WEAVE_COPY
+#ifndef MP_CHAR_STORE_SLOW
mp_err weave_to_mpi(mp_int *a, const unsigned char *b,
mp_size b_size, mp_size count)
{
- unsigned char *pb = (unsigned char *)MP_DIGITS(a);
+ unsigned char *pb = (unsigned char *)MP_DIGITS(a);
unsigned char *end = pb+ (b_size*sizeof(mp_digit));
MP_SIGN(a) = 0;
MP_USED(a) = b_size;
- for (; pb < end; pb++) {
+ for (; pb < end; b+=count, pb++) {
*pb = *b;
- b += count;
}
return MP_OKAY;
}
+#else
+mp_err weave_to_mpi(mp_int *a, const unsigned char *b,
+ mp_size b_size, mp_size count)
+{
+ mp_digit *pb = MP_DIGITS(a);
+ mp_digit *end = &pb[b_size];
+
+ MP_SIGN(a) = 0;
+ MP_USED(a) = b_size;
+
+ for (; pb < end; pb++) {
+ register mp_digit digit;
+
+ digit = *b << 8; b += count;
+#if MP_DIGIT_BITS == 32 || MP_DIGIT_BITS == 64
+ digit |= *b; b += count; digit = digit << 8;
+ digit |= *b; b += count; digit = digit << 8;
+#if MP_DIGIT_BITS == 64
+ digit |= *b; b += count; digit = digit << 8;
+ digit |= *b; b += count; digit = digit << 8;
+ digit |= *b; b += count; digit = digit << 8;
+ digit |= *b; b += count; digit = digit << 8;
+#endif
+#else
+ for (i=1; i < sizeof(mp_digit)-1; i++) {
+ digit |= *b; b += count; digit = digit << 8;
+ }
#endif
+ digit |= *b; b += count;
+
+ *pb = digit;
+ }
+ return MP_OKAY;
+}
+#endif
+#endif /* MP_USING_WEAVE_COPY */
#define SQR(a,b) \
MP_CHECKOK( mp_sqr(a, b) );\
MP_CHECKOK( s_mp_redc(b, mmm) );
-#ifdef MP_USING_WEAVE_COPY
#if defined(MP_MONT_USE_MP_MUL)
-#define MUL(x,a,b) \
- MP_CHECKOK( weave_to_mpi(&tmp, powers + (x), nLen, num_powers) ); \
- MP_CHECKOK( mp_mul_weave(a, &tmp, b) ); \
+#define MUL_NOWEAVE(x,a,b) \
+ MP_CHECKOK( mp_mul(a, x, b) ); \
MP_CHECKOK( s_mp_redc(b, mmm) ) ;
#else
+#define MUL_NOWEAVE(x,a,b) \
+ MP_CHECKOK( s_mp_mul_mont(a, x, b, mmm) );
+#endif
+
+#ifdef MP_USING_WEAVE_COPY
#define MUL(x,a,b) \
MP_CHECKOK( weave_to_mpi(&tmp, powers + (x), nLen, num_powers) ); \
- MP_CHECKOK( s_mp_mul_mont(a, &tmp, b, mmm) );
-#endif
+ MUL_NOWEAVE(&tmp,a,b)
#else
#if defined(MP_MONT_USE_MP_MUL)
#define MUL(x,a,b) \
@@ -769,7 +972,7 @@ mp_err weave_to_mpi(mp_int *a, const unsigned char *b,
#define MUL(x,a,b) \
MP_CHECKOK( s_mp_mul_mont_weave(a, powers + (x), nLen, num_powers, b, mmm) );
#endif
-#endif
+#endif /* MP_USING_WEAVE_COPY */
#define SWAPPA ptmp = pa1; pa1 = pa2; pa2 = ptmp
#define MP_ALIGN(x,y) ((((ptrdiff_t)(x))+((y)-1))&(~((y)-1)))
@@ -786,15 +989,15 @@ mp_err mp_exptmod_safe_i(const mp_int * montBase,
mp_size num_powers)
{
mp_int *pa1, *pa2, *ptmp;
- mp_size i, j;
+ mp_size i;
mp_size first_window;
mp_err res;
int expOff;
- mp_int accum1, accum2;
+ mp_int accum1, accum2, accum[WEAVE_WORD_SIZE];
#ifdef MP_USING_WEAVE_COPY
mp_int tmp;
#endif
- unsigned char powersArray[MAX_POWERS * (MAX_MODULUS_LENGTH+1)];
+ unsigned char powersArray[MAX_POWERS * (MAX_MODULUS_BYTES+1)];
unsigned char *powers;
ARGCHK( nLen <= MAX_MODULUS_DIGITS , MP_BADARG);
@@ -804,6 +1007,10 @@ mp_err mp_exptmod_safe_i(const mp_int * montBase,
MP_DIGITS(&accum1) = 0;
MP_DIGITS(&accum2) = 0;
+ MP_DIGITS(&accum[0]) = 0;
+ MP_DIGITS(&accum[1]) = 0;
+ MP_DIGITS(&accum[2]) = 0;
+ MP_DIGITS(&accum[3]) = 0;
/* grab the first window value. This allows us to preload accumulator1
* and save a conversion, some squares and a multiple*/
@@ -811,62 +1018,75 @@ mp_err mp_exptmod_safe_i(const mp_int * montBase,
bits_in_exponent-window_bits, window_bits) );
first_window = (mp_size)res;
- MP_CHECKOK( mp_init_size(&accum2, 3 * nLen + 2) );
+ MP_CHECKOK( mp_init_size(&accum[0], 3 * nLen + 2) );
+ MP_CHECKOK( mp_init_size(&accum[1], 3 * nLen + 2) );
+ MP_CHECKOK( mp_init_size(&accum[2], 3 * nLen + 2) );
+ MP_CHECKOK( mp_init_size(&accum[3], 3 * nLen + 2) );
MP_CHECKOK( mp_init_size(&accum1, 3 * nLen + 2) );
+ MP_CHECKOK( mp_init_size(&accum2, 3 * nLen + 2) );
#ifdef MP_USING_WEAVE_COPY
MP_DIGITS(&tmp) = 0;
MP_CHECKOK( mp_init_size(&tmp, 3 * nLen + 2) );
#endif
- mp_set(&accum2, 1);
- MP_CHECKOK( s_mp_to_mont(&accum2, mmm, &accum2) );
- /* unlike mp_copy_init, mp_copy is from, to */
- /* can this be an assert? If we are clamped, we shouldn't ever have a case
- * where the first window is '0' */
- if (first_window == 0) {
- MP_CHECKOK( mp_copy(&accum2, &accum1) );
- }
- MP_CHECKOK( mpi_to_weave(&accum2, powers, nLen, num_powers) );
-
- MP_CHECKOK( mp_copy(montBase, &accum2) );
- if (first_window == 1) {
- MP_CHECKOK( mp_copy(&accum2, &accum1) );
+ /* build the first 4 powers inline */
+ if (num_powers > 2) {
+ mp_set(&accum[0], 1);
+ MP_CHECKOK( s_mp_to_mont(&accum[0], mmm, &accum[0]) );
+ MP_CHECKOK( mp_copy(montBase, &accum[1]) );
+ SQR(montBase, &accum[2]);
+ MUL_NOWEAVE(montBase, &accum[2], &accum[3]);
+ MP_CHECKOK( mpi_to_weave(accum, powers, nLen, num_powers) );
+ if (first_window < 4) {
+ MP_CHECKOK( mp_copy(&accum[first_window], &accum1) );
+ first_window = num_powers;
+ }
+ } else {
+ /* assert first_window == 1? */
+ MP_CHECKOK( mp_copy(montBase, &accum1) );
}
- MP_CHECKOK( mpi_to_weave(&accum2, powers+1, nLen, num_powers) );
/* this adds 2**(k-1)-2 square operations over just calculating the
* odd powers where k is the window size. We will get some of that
* back by not needing the first 'N' squares for the window (though
* squaring 1 is extremely fast, so it's not much savings) */
-
- /* This loop is like this so we can calculate all the powers with only 1
- * temp variable. This saves us from needing a weaved square routine.
- */
- for (i = 2; i < num_powers; i++) {
- if (i == 2 ) {
- MP_CHECKOK( mp_sqr(&accum2, &accum2) );
- MP_CHECKOK( s_mp_redc(&accum2, mmm) );
- if (first_window == i) {
- MP_CHECKOK( mp_copy(&accum2, &accum1) );
- }
- MP_CHECKOK( mpi_to_weave(&accum2, powers+i, nLen, num_powers) );
- } else if ( i & 1 ) {
- MUL(i-1, montBase, &accum2);
- if (first_window == i) {
- MP_CHECKOK( mp_copy(&accum2, &accum1) );
- }
- MP_CHECKOK( mpi_to_weave(&accum2, powers+i, nLen, num_powers) );
+ for (i = 4; i < num_powers; i++) {
+ int acc_index = i & 0x3; /* i % 4 */
+ if ( i & 1 ) {
+ MUL_NOWEAVE(montBase, &accum[acc_index-1] , &accum[acc_index]);
+ /* we've filled the array do our 'per array' processing */
+ if (acc_index == 3) {
+ MP_CHECKOK( mpi_to_weave(accum, powers + i - 3, nLen, num_powers) );
+
+ if (first_window <= i) {
+ MP_CHECKOK( mp_copy(&accum[first_window & 0x3], &accum1) );
+ first_window = num_powers;
+ }
+ }
} else {
- continue;
- }
- for (j=i*2; j < num_powers; j *= 2) {
- MP_CHECKOK( mp_sqr(&accum2, &accum2) );
- MP_CHECKOK( s_mp_redc(&accum2, mmm) );
- if (first_window == j) {
- MP_CHECKOK( mp_copy(&accum2, &accum1) );
+ /* up to 8 we can find 2^i-1 in the accum array, but at 8 we our source
+ * and target are the same so we need to copy.. After that, the
+ * value is overwritten, so we need to fetch it from the stored
+ * weave array */
+ if (i > 8) {
+#ifdef MP_USING_WEAVE_COPY
+ MP_CHECKOK(weave_to_mpi(&accum2, powers+i/2, nLen, num_powers));
+ SQR(&accum2, &accum[acc_index]);
+#else
+ int prev_index = (acc_index - 1) & 0x3;
+ MUL_NOWEAVE(montBase, &accum[prev_index] , &accum[acc_index]);
+#endif
+ } else {
+ int half_power_index = (i/2) & 0x3;
+ if (half_power_index == acc_index) {
+ /* copy is cheaper than weave_to_mpi */
+ MP_CHECKOK(mp_copy(&accum[half_power_index], &accum2));
+ SQR(&accum2,&accum[acc_index]);
+ } else {
+ SQR(&accum[half_power_index],&accum[acc_index]);
}
- MP_CHECKOK( mpi_to_weave(&accum2, powers+j, nLen, num_powers) );
+ }
}
}
/* if the accum1 isn't set, then either j was out of range, or our logic
@@ -889,7 +1109,7 @@ mp_err mp_exptmod_safe_i(const mp_int * montBase,
if (!smallExp) {
SQR(pa1,pa2); SWAPPA;
} else if (smallExp & 1) {
- SQR(pa1,pa2); MUL(1,pa2,pa1);
+ SQR(pa1,pa2); MUL_NOWEAVE(montBase,pa2,pa1);
} else {
ABORT;
}
@@ -933,7 +1153,7 @@ mp_err mp_exptmod(const mp_int *inBase, const mp_int *exponent,
mp_int montBase, goodBase;
mp_mont_modulus mmm;
#ifdef MP_USING_CACHE_SAFE_MOD_EXP
- static int max_window_bits;
+ static unsigned int max_window_bits;
#endif
/* function for computing n0prime only works if n0 is odd */
@@ -1006,7 +1226,7 @@ mp_err mp_exptmod(const mp_int *inBase, const mp_int *exponent,
* the cache line size.
*/
if (!max_window_bits) {
- unsigned long cache_size = mpi_getProcessorLineSize();
+ unsigned long cache_size = s_mpi_getProcessorLineSize();
/* processor has no cache, use 'fast' code always */
if (cache_size == 0) {
mp_using_cache_safe_exp = 0;