1 files changed, 69 insertions, 71 deletions
diff --git a/security/nss/lib/freebl/mpi/mpmontg.c b/security/nss/lib/freebl/mpi/mpmontg.c
index 312e0d089..bea8009c2 100644
--- a/security/nss/lib/freebl/mpi/mpmontg.c
+++ b/security/nss/lib/freebl/mpi/mpmontg.c
@@ -541,108 +541,106 @@ mp_err mp_set_safe_modexp(int value)
 
 #ifndef MP_CHAR_STORE_SLOW
 /*
- * mpi_to_weave takes MPI data and stores in into a byte array interleaved.
+ * mpi_to_weave takes an array of bignums, a matrix in which each bignum 
+ * occupies all the columns of a row, and transposes it into a matrix in 
+ * which each bignum occupies a column of every row.  The first row of the
+ * input matrix becomes the first column of the output matrix.  The n'th
+ * row of input becomes the n'th column of output.  The input data is said
+ * to be "interleaved" or "woven" into the output matrix.
  *
- * The purpose of this interleaving is to hide our access to the array of
- * modulus powers from and attacker snooping on cache hits and misses. Because
- * the array is interleaved, each reference will cause exactly the same cache
- * lines to reload.
+ * The array of bignums is left in this woven form.  Each time a single
+ * bignum value is needed, it is recreated by fetching the n'th column, 
+ * forming a single row which is the new bignum.
  *
- * There are 2 different implementations in this file, one which works with just
- * byte loads and stores, the second which works with mp_weave_word loads and
- * stores. These 2 implementations have DIFFERENT results in exactly which byte
- * of an mp_digit winds up in which location in the byte array. That is why
- * there are 2 sets of explanations for how the array is set up.
+ * The purpose of this interleaving is make it impossible to determine which
+ * of the bignums is being used in any one operation by examining the pattern
+ * of cache misses.
  *
+ * The weaving function does not transpose the entire input matrix in one call.
+ * It transposes 4 rows of mp_ints into their respective columns of output.
  *
- *  a is an array of WEAVE_WORD_SIZE mp_ints (that is 4). 
- * It is a partial window into a logical array mp_int p[count] containing
- * the base to the 0 through count-1 powers. Ideally this code would be 
- * simpler if we stored one element of that array at a time, but on some 
- * platforms the cost of storing a byte incurs a full read modify write cycle
- * and increases the memory bandwidth cost by a factor of 4 or 8. By collecting
- * for mp_ints together, we can arrange to store all 4 values in a single
- * word write. 
- *
- *  b is the targeted weaved location. b[0] points to the first byte where
- * first byte of the a array needs to be stored. Each b is an offset into the 
- * weave array.
- * 
- *  count is 2^window size. 
- *
- *  b_size is the size in mp_digits of each mp_int in the array. mp_ints
- * with less than b_size elements are logically padded with zeros before
- * storing.
+ * There are two different implementations of the weaving and unweaving code
+ * in this file.  One uses byte loads and stores.  The second uses loads and
+ * stores of mp_weave_word size values.  The weaved forms of these two 
+ * implementations differ.  Consequently, each one has its own explanation.
  *
+ * Here is the explanation for the byte-at-a-time implementation.
  *
- * Data is stored as follows :
- * The mp_int array is treated as a byte array.
+ * This implementation treats each mp_int bignum as an array of bytes, 
+ * rather than as an array of mp_digits.  It stores those bytes as a 
+ * column of bytes in the output matrix.  It doesn't care if the machine
+ * uses big-endian or little-endian byte ordering within mp_digits.
+ * The first byte of the mp_digit array becomes the first byte in the output
+ * column, regardless of whether that byte is the MSB or LSB of the mp_digit.
  *
+ * "bignums" is an array of mp_ints.
+ * It points to four rows, four mp_ints, a subset of a larger array of mp_ints.
  *
- * we want to eventually store the logical array mp_int p[count] into the
- * weave array as follows:
-
- * p[count].digit is treated as a byte array (rather than * an mp_digit array),
- * N is count, and n is b_size * *sizeof(mp_digit):
- *
- * p[0].digit[0]   p[1].digit[0]   ...... p[N-2].digit[0]   p[N-1].digit[0]
- * p[0].digit[1]   p[1].digit[1]   ...... p[N-2].digit[1]   p[N-1].digit[1]
- *            .                                         .
- *            .                                         .
- * p[0].digit[n-2] p[1].digit[n-2] ...... p[N-2].digit[n-2] p[N-1].digit[n-2]
- * p[0].digit[n-1] p[1].digit[n-1] ...... p[N-2].digit[n-1] p[N-1].digit[n-1] 
+ * "weaved" is the weaved output matrix. 
+ * The first byte of bignums[0] is stored in weaved[0].
+ * 
+ * "nBignums" is the total number of bignums in the array of which "bignums" 
+ * is a part.  
  *
- * This function stores that a window of p in each call.
+ * "nDigits" is the size in mp_digits of each mp_int in the "bignums" array. 
+ * mp_ints that use less than nDigits digits are logically padded with zeros 
+ * while being stored in the weaved array.
  */
-mp_err mpi_to_weave(const mp_int *a, unsigned char *b, 
-			             mp_size b_size,  mp_size count)
+mp_err mpi_to_weave(const mp_int  *bignums, 
+                    unsigned char *weaved, 
+		    mp_size nDigits,  /* in each mp_int of input */
+		    mp_size nBignums) /* in the entire source array */
 {
-  mp_size i, j;
-  unsigned char *bsave = b;
+  mp_size i;
+  unsigned char * endDest = weaved + (nDigits * nBignums * sizeof(mp_digit));
 
   for (i=0; i < WEAVE_WORD_SIZE; i++) {
-    unsigned char *pb = (unsigned char *)MP_DIGITS(&a[i]);
-    mp_size useda = MP_USED(&a[i]);
-    mp_size zero =  b_size - useda;
-    unsigned char *end = pb+ (useda*sizeof(mp_digit));
-    b = bsave+i;
-
+    mp_size used = MP_USED(&bignums[i]);
+    unsigned char *pSrc   = (unsigned char *)MP_DIGITS(&bignums[i]);
+    unsigned char *endSrc = pSrc + (used * sizeof(mp_digit));
+    unsigned char *pDest  = weaved + i;
 
-    ARGCHK(MP_SIGN(&a[i]) == MP_ZPOS, MP_BADARG);
-    ARGCHK(useda <= b_size, MP_BADARG);
+    ARGCHK(MP_SIGN(&bignums[i]) == MP_ZPOS, MP_BADARG);
+    ARGCHK(used <= nDigits, MP_BADARG);
 
-    for (; pb < end; pb++) {
-      *b = *pb;
-      b += count;
+    for (; pSrc < endSrc; pSrc++) {
+      *pDest = *pSrc;
+      pDest += nBignums;
     }
-    for (j=0; j < zero; j++) {
-      *b = 0;
-      b += count;
+    while (pDest < endDest) {
+      *pDest = 0;
+      pDest += nBignums;
     }
   }
 
   return MP_OKAY;
 }
 
-/* reverse the operation above for one entry.
- * b points to the offset into the weave array of the power we are
- * calculating */
-mp_err weave_to_mpi(mp_int *a, const unsigned char *b, 
-					mp_size b_size, mp_size count)
+/* Reverse the operation above for one mp_int.
+ * Reconstruct one mp_int from its column in the weaved array.
+ * "pSrc" points to the offset into the weave array of the bignum we 
+ * are going to reconstruct.
+ */
+mp_err weave_to_mpi(mp_int *a,                /* output, result */
+                    const unsigned char *pSrc, /* input, byte matrix */
+		    mp_size nDigits,          /* per mp_int output */
+		    mp_size nBignums)         /* bignums in weaved matrix */
 {
-  unsigned char  *pb = (unsigned char *)MP_DIGITS(a);
-  unsigned char *end = pb+ (b_size*sizeof(mp_digit));
+  unsigned char *pDest   = (unsigned char *)MP_DIGITS(a);
+  unsigned char *endDest = pDest + (nDigits * sizeof(mp_digit));
 
   MP_SIGN(a) = MP_ZPOS;
-  MP_USED(a) = b_size;
+  MP_USED(a) = nDigits;
 
-  for (; pb < end; b+=count, pb++) {
-    *pb = *b;
+  for (; pDest < endDest; pSrc += nBignums, pDest++) {
+    *pDest = *pSrc;
   }
   s_mp_clamp(a);
   return MP_OKAY;
 }
+
 #else
+
 /* Need a primitive that we know is 32 bits long... */
 /* this is true on all modern processors we know of today*/
 typedef unsigned int mp_weave_word;