diff options
author | relyea%netscape.com <devnull@localhost> | 2005-11-11 19:53:26 +0000 |
---|---|---|
committer | relyea%netscape.com <devnull@localhost> | 2005-11-11 19:53:26 +0000 |
commit | 885fa706d4fc6806355c184035d4c3553b57a5e2 (patch) | |
tree | 95f25a28a5d924c7ad4ee9b8f0995f637ebb8d20 | |
parent | 146060d17ac1ee1fabef61661ce40bb4f869f694 (diff) | |
download | nss-hg-885fa706d4fc6806355c184035d4c3553b57a5e2.tar.gz |
commit previous reviewd version of mpmontg.c to the TMP branch as a baseline
for the 'final' version.
-rw-r--r-- | security/nss/lib/freebl/mpi/mpmontg.c | 480 |
1 files changed, 350 insertions, 130 deletions
diff --git a/security/nss/lib/freebl/mpi/mpmontg.c b/security/nss/lib/freebl/mpi/mpmontg.c index cc4d233fd..13a95b172 100644 --- a/security/nss/lib/freebl/mpi/mpmontg.c +++ b/security/nss/lib/freebl/mpi/mpmontg.c @@ -50,6 +50,7 @@ /* #define MP_USING_MONT_MULF 1 */ #define MP_USING_CACHE_SAFE_MOD_EXP 1 #define MP_USING_WEAVE_COPY 1 +#define MP_CHAR_STORE_SLOW 1 #include <string.h> #include "mpi-priv.h" #include "mp_gf2m-priv.h" @@ -58,10 +59,15 @@ #ifdef MP_USING_MONT_MULF #include "montmulf.h" #endif -#include "prtypes.h" +#include <stddef.h> /* ptrdiff_t */ -#include <fcntl.h> -#include <unistd.h> +/* need to know endianness of this platform. If we aren't told, get it from + * nspr... */ +#ifdef MP_CHAR_STORE_SLOW +#if !defined(IS_BIG_ENDIAN) && !defined(IS_LITTLE_ENDIAN) +#include "prcpucfg.h" +#endif +#endif #define STATIC /* #define DEBUG 1 */ @@ -70,8 +76,8 @@ #define MAX_ODD_INTS 32 /* 2 ** (WINDOW_BITS - 1) */ #define MAX_POWERS MAX_ODD_INTS*2 #define MAX_MODULUS_BITS 8192 -#define MAX_MODULUS_LENGTH (MAX_MODULUS_BITS/8) -#define MAX_MODULUS_DIGITS (MAX_MODULUS_LENGTH/sizeof(mp_digit)) +#define MAX_MODULUS_BYTES (MAX_MODULUS_BITS/8) +#define MAX_MODULUS_DIGITS (MAX_MODULUS_BYTES/sizeof(mp_digit)) #if defined(_WIN32_WCE) #define ABORT res = MP_UNDEF; goto CLEANUP @@ -523,77 +529,97 @@ CLEANUP: #undef MUL #ifdef MP_USING_CACHE_SAFE_MOD_EXP - unsigned int mp_using_cache_safe_exp = 1; +#endif -void mp_set_mode_modify() { mp_using_cache_safe_exp = 0; } -void mp_set_mode_safe() { mp_using_cache_safe_exp = 1; } +mp_err mp_set_modexp_mode(int value) +{ +#ifdef MP_USING_CACHE_SAFE_MOD_EXP + mp_using_cache_safe_exp = value; + return MP_OKAY; +#else + if (value == 0) { + return MP_OKAY; + } + return MP_BADARG; +#endif +} + + +#ifdef MP_USING_CACHE_SAFE_MOD_EXP #ifndef MP_USING_WEAVE_COPY -#if MP_DIGIT_BITS == 32 +#ifndef MP_CHAR_STORE_SLOW +#define WEAVE_BASE_INIT \ + unsigned char *_ptr; + +#define WEAVE_FIRST(bi,b,count) \ + _ptr = (unsigned char *)bi; \ + *_ptr++ = *b; b+= count; + +#define WEAVE_MIDDLE(bi,b,count) \ + *_ptr++ = *b; b+= count; + +#define WEAVE_LAST(bi,b,count) \ + *_ptr++ = *b; b+= count; + +#else +#define WEAVE_BASE_INIT \ + register mp_digit _digit; + +#define WEAVE_FIRST(bi,b,count) \ + _digit = *b << 8; b += count; + +#define WEAVE_MIDDLE(bi,b,count) \ + _digit |= *b; b += count; _digit = _digit << 8; + +#define WEAVE_LAST(bi,b,count) \ + _digit |= *b; b += count; \ + *bi = _digit; +#endif /* MP_CHAR_STORE_SLOW */ + +#if MP_DIGIT_BITS == 32 #define WEAVE_INIT \ - unsigned char *_ptr; + WEAVE_BASE_INIT #define WEAVE_FETCH(bi, b, count) \ - _ptr = (unsigned char *)bi; \ - *_ptr++ = *b; b+= count; \ - *_ptr++ = *b; b+= count; \ - *_ptr++ = *b; b+= count; \ - *_ptr++ = *b; b+= count; - -#define WEAVE_PUT(bi, b, count) \ - _ptr = (unsigned char *)bi; \ - *b = *_ptr++; b+= count; \ - *b = *_ptr++; b+= count; \ - *b = *_ptr++; b+= count; \ - *b = *_ptr++; b+= count; + WEAVE_FIRST(bi,b,count) \ + WEAVE_MIDDLE(bi,b,count) \ + WEAVE_MIDDLE(bi,b,count) \ + WEAVE_LAST(bi,b,count) + #else -#if MP_DIGIT_BITS == 64 +#ifdef MP_DIGIT_BITS == 64 + #define WEAVE_INIT \ - unsigned char *_ptr + WEAVE_BASE_INIT #define WEAVE_FETCH(bi, b, count) \ - _ptr = (unsigned char *)bi; \ - *_ptr++ = *b; b+= count; \ - *_ptr++ = *b; b+= count; \ - *_ptr++ = *b; b+= count; \ - *_ptr++ = *b; b+= count; \ - *_ptr++ = *b; b+= count; \ - *_ptr++ = *b; b+= count; \ - *_ptr++ = *b; b+= count; \ - *_ptr++ = *b; b+= count; - -#define WEAVE_PUT(bi, b, count) \ - _ptr = (unsigned char *)bi; \ - *b = *_ptr++; b+= count; \ - *b = *_ptr++; b+= count; \ - *b = *_ptr++; b+= count; \ - *b = *_ptr++; b+= count; \ - *b = *_ptr++; b+= count; \ - *b = *_ptr++; b+= count; \ - *b = *_ptr++; b+= count; \ - *b = *_ptr++; b+= count; + WEAVE_FIRST(bi,b,count) \ + WEAVE_MIDDLE(bi,b,count) \ + WEAVE_MIDDLE(bi,b,count) \ + WEAVE_MIDDLE(bi,b,count) \ + WEAVE_MIDDLE(bi,b,count) \ + WEAVE_MIDDLE(bi,b,count) \ + WEAVE_MIDDLE(bi,b,count) \ + WEAVE_LAST(bi,b,count) + #else #define WEAVE_INIT \ - int _i; \ - unsigned char *_ptr; + int _i; \ + WEAVE_BASE_INIT - /* It would be nice to unroll this loop as well */ + /* It would be nice to unroll this loop as well */ #define WEAVE_FETCH(bi, b, count) \ - _ptr = (unsigned char *)bi; \ - for (_i=0; _i < sizeof mp_digit ; _i++) { \ - *_ptr++ = *b; \ - b+=count; \ - } - -#define WEAVE_PUT(bi, b, count) \ - _ptr = (unsigned char *)bi; \ - for (_i=0; _i < sizeof mp_digit ; _i++) { \ - *b = *_ptr++; \ - b+=count; \ - } -#endif + WEAVE_FIRST(bi,b,count) \ + WEAVE_LAST(bi,b,count) \ + for (_i=1; _i < sizeof(mp_digit) -1 ; _i++) { \ + WEAVE_MIDDLE(bi,b,count) \ + } \ + WEAVE_LAST(bi,b,count) + +#endif #endif #if !defined(MP_MONT_USE_MP_MUL) @@ -702,64 +728,241 @@ CLEANUP: } /* end mp_mul() */ #endif /* MP_USING_WEAVE_COPY */ +#define WEAVE_WORD_SIZE 4 + +#ifndef MP_CHAR_STORE_SLOW +mp_err mpi_to_weave(const mp_int *a, unsigned char *b, + mp_size b_size, mp_size count) +{ + mp_size i, j; + unsigned char *bsave = b; + + for (i=0; i < WEAVE_WORD_SIZE; i++) { + unsigned char *pb = (unsigned char *)MP_DIGITS(&a[i]); + mp_size useda = MP_USED(&a[i]); + mp_size zero = b_size - useda; + unsigned char *end = pb+ (useda*sizeof(mp_digit)); + b = bsave+i; + + + ARGCHK(MP_SIGN(&a[i]) == 0, MP_BADARG); + ARGCHK(useda <= b_size, MP_BADARG); + + for (; pb < end; pb++) { + *b = *pb; + b += count; + } + for (j=0; j < zero; j++) { + *b = 0; + b += count; + } + } + + return MP_OKAY; +} +#else +/* Need a primitive that we know is 32 bits long... */ +#if UINT_MAX == MP_32BIT_MAX +typedef unsigned int mp_weave_word; +#else +#if ULONG_MAX == MP_32BIT_MAX +typedef unsigned long mp_weave_word; +#else +#error "Can't find 32 bit primitive type for this platform" +#endif +#endif + +/* + * on some platforms character stores into memory is very expensive since they + * generate a read/modify/write operation on the bus. On those platforms + * we need to do integer writes to the bus. + * + * The weave_to_mpi function in those cases expect the data to be laid out in + * big endian, interleaved. + * + * since we need to interleave on a byte by byte basis, we need to collect + * several mpi structures together into a single uint32 before we write. We + * also need to make sure the uint32 is arranged so that the first value of + * the first array winds up in b[0]. This means construction of that uint32 + * is endian specific (even though the layout of the array is always big + * endian. + */ mp_err mpi_to_weave(const mp_int *a, unsigned char *b, mp_size b_size, mp_size count) { mp_size i; - unsigned char *pb = (unsigned char *)MP_DIGITS(a); - mp_size useda = MP_USED(a); - mp_size zero = b_size - useda; - unsigned char *end = pb+ (useda*sizeof(mp_digit)); + mp_digit *digitsa0; + mp_digit *digitsa1; + mp_digit *digitsa2; + mp_digit *digitsa3; + mp_size useda0; + mp_size useda1; + mp_size useda2; + mp_size useda3; + mp_weave_word *weaved = (mp_weave_word *)b; +#if MP_DIGIT_BITS != 32 && MP_DIGIT_BITS != 64 + mp_size j; +#endif - ARGCHK(MP_SIGN(a) == 0, MP_BADARG); - ARGCHK(useda <= b_size, MP_BADARG); + count = count/sizeof(mp_weave_word); + + /* this code pretty much depends on this ! */ + /*assert(WEAVE_WORD_SIZE == 4); */ + + digitsa0 = MP_DIGITS(&a[0]); + digitsa1 = MP_DIGITS(&a[1]); + digitsa2 = MP_DIGITS(&a[2]); + digitsa3 = MP_DIGITS(&a[3]); + useda0 = MP_USED(&a[0]); + useda1 = MP_USED(&a[1]); + useda2 = MP_USED(&a[2]); + useda3 = MP_USED(&a[3]); + + ARGCHK(MP_SIGN(&a[0]) == 0, MP_BADARG); + ARGCHK(MP_SIGN(&a[1]) == 0, MP_BADARG); + ARGCHK(MP_SIGN(&a[2]) == 0, MP_BADARG); + ARGCHK(MP_SIGN(&a[3]) == 0, MP_BADARG); + ARGCHK(useda0 <= b_size, MP_BADARG); + ARGCHK(useda1 <= b_size, MP_BADARG); + ARGCHK(useda2 <= b_size, MP_BADARG); + ARGCHK(useda3 <= b_size, MP_BADARG); + +#define SAFE_FETCH(digit, used, word) ((i) < (used) ? (digit[i]) : 0) + + for (i=0; i < b_size; i++) { + mp_digit d0 = SAFE_FETCH(digitsa0,useda0,i); + mp_digit d1 = SAFE_FETCH(digitsa1,useda1,i); + mp_digit d2 = SAFE_FETCH(digitsa2,useda2,i); + mp_digit d3 = SAFE_FETCH(digitsa3,useda3,i); + register mp_weave_word acc; - for (; pb < end; pb++) { - *b = *pb; - b += count; - } - for (i=0; i < zero; i++) { - *b = 0; - b += count; +/* + * ONE_STEP takes to MSB of each of our current digits and places that + * byte in the appropriate position for writing to the weaved array. + * On little endian: + * b3 b2 b1 b0 + * On big endian: + * b0 b1 b2 b3 + * When the data is written it would always wind up: + * b[0] = b0 + * b[1] = b1 + * b[2] = b2 + * b[3] = b3 + * + * Once weave written the MSB, we shift the whole digit up left one + * byte, putting the Next Most Significant Byte in the MSB position, + * so we we repeat the next one step that byte will be written. + */ +#ifdef IS_LITTLE_ENDIAN +#define MPI_WEAVE_ONE_STEP \ + acc = (d0 >> (MP_DIGIT_BITS-8)) & 0xff ; d0 <<= 8; /*b0*/ \ + acc |= (d1 >> (MP_DIGIT_BITS-16)) & 0xff00 ; d1 <<= 8; /*b1*/ \ + acc |= (d2 >> (MP_DIGIT_BITS-24)) & 0xff0000 ; d2 <<= 8; /*b2*/ \ + acc |= (d3 >> (MP_DIGIT_BITS-32)) & 0xff000000; d3 <<= 8; /*b3*/ \ + *weaved = acc; weaved += count; +#else +#error "Intel is Little endian, but IS_LITTLE_ENDIAN is not defined!" +#define MPI_WEAVE_ONE_STEP \ + acc = (d0 >> (MP_DIGIT_BITS-32)) & 0xff000000; d0 <<= 8; /*b0*/ \ + acc |= (d1 >> (MP_DIGIT_BITS-24)) & 0xff0000 ; d1 <<= 8; /*b1*/ \ + acc |= (d2 >> (MP_DIGIT_BITS-16)) & 0xff00 ; d2 <<= 8; /*b2*/ \ + acc |= (d3 >> (MP_DIGIT_BITS-8)) & 0xff ; d3 <<= 8; /*b3*/ \ + *weaved = acc; weaved += count; +#endif + +#if MP_DIGIT_BITS == 32 || MP_DIGIT_BITS == 64 + MPI_WEAVE_ONE_STEP + MPI_WEAVE_ONE_STEP + MPI_WEAVE_ONE_STEP + MPI_WEAVE_ONE_STEP +#if MP_DIGIT_BITS == 64 + MPI_WEAVE_ONE_STEP + MPI_WEAVE_ONE_STEP + MPI_WEAVE_ONE_STEP + MPI_WEAVE_ONE_STEP +#endif +#else + for (j=0; j < sizeof (mp_digit); j++) { + MPI_WEAVE_ONE_STEP + } +#endif } return MP_OKAY; } +#endif #ifdef MP_USING_WEAVE_COPY +#ifndef MP_CHAR_STORE_SLOW mp_err weave_to_mpi(mp_int *a, const unsigned char *b, mp_size b_size, mp_size count) { - unsigned char *pb = (unsigned char *)MP_DIGITS(a); + unsigned char *pb = (unsigned char *)MP_DIGITS(a); unsigned char *end = pb+ (b_size*sizeof(mp_digit)); MP_SIGN(a) = 0; MP_USED(a) = b_size; - for (; pb < end; pb++) { + for (; pb < end; b+=count, pb++) { *pb = *b; - b += count; } return MP_OKAY; } +#else +mp_err weave_to_mpi(mp_int *a, const unsigned char *b, + mp_size b_size, mp_size count) +{ + mp_digit *pb = MP_DIGITS(a); + mp_digit *end = &pb[b_size]; + + MP_SIGN(a) = 0; + MP_USED(a) = b_size; + + for (; pb < end; pb++) { + register mp_digit digit; + + digit = *b << 8; b += count; +#if MP_DIGIT_BITS == 32 || MP_DIGIT_BITS == 64 + digit |= *b; b += count; digit = digit << 8; + digit |= *b; b += count; digit = digit << 8; +#if MP_DIGIT_BITS == 64 + digit |= *b; b += count; digit = digit << 8; + digit |= *b; b += count; digit = digit << 8; + digit |= *b; b += count; digit = digit << 8; + digit |= *b; b += count; digit = digit << 8; +#endif +#else + for (i=1; i < sizeof(mp_digit)-1; i++) { + digit |= *b; b += count; digit = digit << 8; + } #endif + digit |= *b; b += count; + + *pb = digit; + } + return MP_OKAY; +} +#endif +#endif /* MP_USING_WEAVE_COPY */ #define SQR(a,b) \ MP_CHECKOK( mp_sqr(a, b) );\ MP_CHECKOK( s_mp_redc(b, mmm) ); -#ifdef MP_USING_WEAVE_COPY #if defined(MP_MONT_USE_MP_MUL) -#define MUL(x,a,b) \ - MP_CHECKOK( weave_to_mpi(&tmp, powers + (x), nLen, num_powers) ); \ - MP_CHECKOK( mp_mul_weave(a, &tmp, b) ); \ +#define MUL_NOWEAVE(x,a,b) \ + MP_CHECKOK( mp_mul(a, x, b) ); \ MP_CHECKOK( s_mp_redc(b, mmm) ) ; #else +#define MUL_NOWEAVE(x,a,b) \ + MP_CHECKOK( s_mp_mul_mont(a, x, b, mmm) ); +#endif + +#ifdef MP_USING_WEAVE_COPY #define MUL(x,a,b) \ MP_CHECKOK( weave_to_mpi(&tmp, powers + (x), nLen, num_powers) ); \ - MP_CHECKOK( s_mp_mul_mont(a, &tmp, b, mmm) ); -#endif + MUL_NOWEAVE(&tmp,a,b) #else #if defined(MP_MONT_USE_MP_MUL) #define MUL(x,a,b) \ @@ -769,7 +972,7 @@ mp_err weave_to_mpi(mp_int *a, const unsigned char *b, #define MUL(x,a,b) \ MP_CHECKOK( s_mp_mul_mont_weave(a, powers + (x), nLen, num_powers, b, mmm) ); #endif -#endif +#endif /* MP_USING_WEAVE_COPY */ #define SWAPPA ptmp = pa1; pa1 = pa2; pa2 = ptmp #define MP_ALIGN(x,y) ((((ptrdiff_t)(x))+((y)-1))&(~((y)-1))) @@ -786,15 +989,15 @@ mp_err mp_exptmod_safe_i(const mp_int * montBase, mp_size num_powers) { mp_int *pa1, *pa2, *ptmp; - mp_size i, j; + mp_size i; mp_size first_window; mp_err res; int expOff; - mp_int accum1, accum2; + mp_int accum1, accum2, accum[WEAVE_WORD_SIZE]; #ifdef MP_USING_WEAVE_COPY mp_int tmp; #endif - unsigned char powersArray[MAX_POWERS * (MAX_MODULUS_LENGTH+1)]; + unsigned char powersArray[MAX_POWERS * (MAX_MODULUS_BYTES+1)]; unsigned char *powers; ARGCHK( nLen <= MAX_MODULUS_DIGITS , MP_BADARG); @@ -804,6 +1007,10 @@ mp_err mp_exptmod_safe_i(const mp_int * montBase, MP_DIGITS(&accum1) = 0; MP_DIGITS(&accum2) = 0; + MP_DIGITS(&accum[0]) = 0; + MP_DIGITS(&accum[1]) = 0; + MP_DIGITS(&accum[2]) = 0; + MP_DIGITS(&accum[3]) = 0; /* grab the first window value. This allows us to preload accumulator1 * and save a conversion, some squares and a multiple*/ @@ -811,62 +1018,75 @@ mp_err mp_exptmod_safe_i(const mp_int * montBase, bits_in_exponent-window_bits, window_bits) ); first_window = (mp_size)res; - MP_CHECKOK( mp_init_size(&accum2, 3 * nLen + 2) ); + MP_CHECKOK( mp_init_size(&accum[0], 3 * nLen + 2) ); + MP_CHECKOK( mp_init_size(&accum[1], 3 * nLen + 2) ); + MP_CHECKOK( mp_init_size(&accum[2], 3 * nLen + 2) ); + MP_CHECKOK( mp_init_size(&accum[3], 3 * nLen + 2) ); MP_CHECKOK( mp_init_size(&accum1, 3 * nLen + 2) ); + MP_CHECKOK( mp_init_size(&accum2, 3 * nLen + 2) ); #ifdef MP_USING_WEAVE_COPY MP_DIGITS(&tmp) = 0; MP_CHECKOK( mp_init_size(&tmp, 3 * nLen + 2) ); #endif - mp_set(&accum2, 1); - MP_CHECKOK( s_mp_to_mont(&accum2, mmm, &accum2) ); - /* unlike mp_copy_init, mp_copy is from, to */ - /* can this be an assert? If we are clamped, we shouldn't ever have a case - * where the first window is '0' */ - if (first_window == 0) { - MP_CHECKOK( mp_copy(&accum2, &accum1) ); - } - MP_CHECKOK( mpi_to_weave(&accum2, powers, nLen, num_powers) ); - - MP_CHECKOK( mp_copy(montBase, &accum2) ); - if (first_window == 1) { - MP_CHECKOK( mp_copy(&accum2, &accum1) ); + /* build the first 4 powers inline */ + if (num_powers > 2) { + mp_set(&accum[0], 1); + MP_CHECKOK( s_mp_to_mont(&accum[0], mmm, &accum[0]) ); + MP_CHECKOK( mp_copy(montBase, &accum[1]) ); + SQR(montBase, &accum[2]); + MUL_NOWEAVE(montBase, &accum[2], &accum[3]); + MP_CHECKOK( mpi_to_weave(accum, powers, nLen, num_powers) ); + if (first_window < 4) { + MP_CHECKOK( mp_copy(&accum[first_window], &accum1) ); + first_window = num_powers; + } + } else { + /* assert first_window == 1? */ + MP_CHECKOK( mp_copy(montBase, &accum1) ); } - MP_CHECKOK( mpi_to_weave(&accum2, powers+1, nLen, num_powers) ); /* this adds 2**(k-1)-2 square operations over just calculating the * odd powers where k is the window size. We will get some of that * back by not needing the first 'N' squares for the window (though * squaring 1 is extremely fast, so it's not much savings) */ - - /* This loop is like this so we can calculate all the powers with only 1 - * temp variable. This saves us from needing a weaved square routine. - */ - for (i = 2; i < num_powers; i++) { - if (i == 2 ) { - MP_CHECKOK( mp_sqr(&accum2, &accum2) ); - MP_CHECKOK( s_mp_redc(&accum2, mmm) ); - if (first_window == i) { - MP_CHECKOK( mp_copy(&accum2, &accum1) ); - } - MP_CHECKOK( mpi_to_weave(&accum2, powers+i, nLen, num_powers) ); - } else if ( i & 1 ) { - MUL(i-1, montBase, &accum2); - if (first_window == i) { - MP_CHECKOK( mp_copy(&accum2, &accum1) ); - } - MP_CHECKOK( mpi_to_weave(&accum2, powers+i, nLen, num_powers) ); + for (i = 4; i < num_powers; i++) { + int acc_index = i & 0x3; /* i % 4 */ + if ( i & 1 ) { + MUL_NOWEAVE(montBase, &accum[acc_index-1] , &accum[acc_index]); + /* we've filled the array do our 'per array' processing */ + if (acc_index == 3) { + MP_CHECKOK( mpi_to_weave(accum, powers + i - 3, nLen, num_powers) ); + + if (first_window <= i) { + MP_CHECKOK( mp_copy(&accum[first_window & 0x3], &accum1) ); + first_window = num_powers; + } + } } else { - continue; - } - for (j=i*2; j < num_powers; j *= 2) { - MP_CHECKOK( mp_sqr(&accum2, &accum2) ); - MP_CHECKOK( s_mp_redc(&accum2, mmm) ); - if (first_window == j) { - MP_CHECKOK( mp_copy(&accum2, &accum1) ); + /* up to 8 we can find 2^i-1 in the accum array, but at 8 we our source + * and target are the same so we need to copy.. After that, the + * value is overwritten, so we need to fetch it from the stored + * weave array */ + if (i > 8) { +#ifdef MP_USING_WEAVE_COPY + MP_CHECKOK(weave_to_mpi(&accum2, powers+i/2, nLen, num_powers)); + SQR(&accum2, &accum[acc_index]); +#else + int prev_index = (acc_index - 1) & 0x3; + MUL_NOWEAVE(montBase, &accum[prev_index] , &accum[acc_index]); +#endif + } else { + int half_power_index = (i/2) & 0x3; + if (half_power_index == acc_index) { + /* copy is cheaper than weave_to_mpi */ + MP_CHECKOK(mp_copy(&accum[half_power_index], &accum2)); + SQR(&accum2,&accum[acc_index]); + } else { + SQR(&accum[half_power_index],&accum[acc_index]); } - MP_CHECKOK( mpi_to_weave(&accum2, powers+j, nLen, num_powers) ); + } } } /* if the accum1 isn't set, then either j was out of range, or our logic @@ -889,7 +1109,7 @@ mp_err mp_exptmod_safe_i(const mp_int * montBase, if (!smallExp) { SQR(pa1,pa2); SWAPPA; } else if (smallExp & 1) { - SQR(pa1,pa2); MUL(1,pa2,pa1); + SQR(pa1,pa2); MUL_NOWEAVE(montBase,pa2,pa1); } else { ABORT; } @@ -933,7 +1153,7 @@ mp_err mp_exptmod(const mp_int *inBase, const mp_int *exponent, mp_int montBase, goodBase; mp_mont_modulus mmm; #ifdef MP_USING_CACHE_SAFE_MOD_EXP - static int max_window_bits; + static unsigned int max_window_bits; #endif /* function for computing n0prime only works if n0 is odd */ @@ -1006,7 +1226,7 @@ mp_err mp_exptmod(const mp_int *inBase, const mp_int *exponent, * the cache line size. */ if (!max_window_bits) { - unsigned long cache_size = mpi_getProcessorLineSize(); + unsigned long cache_size = s_mpi_getProcessorLineSize(); /* processor has no cache, use 'fast' code always */ if (cache_size == 0) { mp_using_cache_safe_exp = 0; |