1 files changed, 254 insertions, 0 deletions
diff --git a/libtommath/bn_s_mp_exptmod_fast.c b/libtommath/bn_s_mp_exptmod_fast.c
new file mode 100644
index 0000000..682ded8
--- /dev/null
+++ b/libtommath/bn_s_mp_exptmod_fast.c
@@ -0,0 +1,254 @@
+#include "tommath_private.h"
+#ifdef BN_S_MP_EXPTMOD_FAST_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis */
+/* SPDX-License-Identifier: Unlicense */
+
+/* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
+ *
+ * Uses a left-to-right k-ary sliding window to compute the modular exponentiation.
+ * The value of k changes based on the size of the exponent.
+ *
+ * Uses Montgomery or Diminished Radix reduction [whichever appropriate]
+ */
+
+#ifdef MP_LOW_MEM
+#   define TAB_SIZE 32
+#   define MAX_WINSIZE 5
+#else
+#   define TAB_SIZE 256
+#   define MAX_WINSIZE 0
+#endif
+
+mp_err s_mp_exptmod_fast(const mp_int *G, const mp_int *X, const mp_int *P, mp_int *Y, int redmode)
+{
+   mp_int  M[TAB_SIZE], res;
+   mp_digit buf, mp;
+   int     bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
+   mp_err   err;
+
+   /* use a pointer to the reduction algorithm.  This allows us to use
+    * one of many reduction algorithms without modding the guts of
+    * the code with if statements everywhere.
+    */
+   mp_err(*redux)(mp_int *x, const mp_int *n, mp_digit rho);
+
+   /* find window size */
+   x = mp_count_bits(X);
+   if (x <= 7) {
+      winsize = 2;
+   } else if (x <= 36) {
+      winsize = 3;
+   } else if (x <= 140) {
+      winsize = 4;
+   } else if (x <= 450) {
+      winsize = 5;
+   } else if (x <= 1303) {
+      winsize = 6;
+   } else if (x <= 3529) {
+      winsize = 7;
+   } else {
+      winsize = 8;
+   }
+
+   winsize = MAX_WINSIZE ? MP_MIN(MAX_WINSIZE, winsize) : winsize;
+
+   /* init M array */
+   /* init first cell */
+   if ((err = mp_init_size(&M[1], P->alloc)) != MP_OKAY) {
+      return err;
+   }
+
+   /* now init the second half of the array */
+   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
+      if ((err = mp_init_size(&M[x], P->alloc)) != MP_OKAY) {
+         for (y = 1<<(winsize-1); y < x; y++) {
+            mp_clear(&M[y]);
+         }
+         mp_clear(&M[1]);
+         return err;
+      }
+   }
+
+   /* determine and setup reduction code */
+   if (redmode == 0) {
+      if (MP_HAS(MP_MONTGOMERY_SETUP)) {
+         /* now setup montgomery  */
+         if ((err = mp_montgomery_setup(P, &mp)) != MP_OKAY)      goto LBL_M;
+      } else {
+         err = MP_VAL;
+         goto LBL_M;
+      }
+
+      /* automatically pick the comba one if available (saves quite a few calls/ifs) */
+      if (MP_HAS(S_MP_MONTGOMERY_REDUCE_FAST) &&
+          (((P->used * 2) + 1) < MP_WARRAY) &&
+          (P->used < MP_MAXFAST)) {
+         redux = s_mp_montgomery_reduce_fast;
+      } else if (MP_HAS(MP_MONTGOMERY_REDUCE)) {
+         /* use slower baseline Montgomery method */
+         redux = mp_montgomery_reduce;
+      } else {
+         err = MP_VAL;
+         goto LBL_M;
+      }
+   } else if (redmode == 1) {
+      if (MP_HAS(MP_DR_SETUP) && MP_HAS(MP_DR_REDUCE)) {
+         /* setup DR reduction for moduli of the form B**k - b */
+         mp_dr_setup(P, &mp);
+         redux = mp_dr_reduce;
+      } else {
+         err = MP_VAL;
+         goto LBL_M;
+      }
+   } else if (MP_HAS(MP_REDUCE_2K_SETUP) && MP_HAS(MP_REDUCE_2K)) {
+      /* setup DR reduction for moduli of the form 2**k - b */
+      if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY)          goto LBL_M;
+      redux = mp_reduce_2k;
+   } else {
+      err = MP_VAL;
+      goto LBL_M;
+   }
+
+   /* setup result */
+   if ((err = mp_init_size(&res, P->alloc)) != MP_OKAY)           goto LBL_M;
+
+   /* create M table
+    *
+
+    *
+    * The first half of the table is not computed though accept for M[0] and M[1]
+    */
+
+   if (redmode == 0) {
+      if (MP_HAS(MP_MONTGOMERY_CALC_NORMALIZATION)) {
+         /* now we need R mod m */
+         if ((err = mp_montgomery_calc_normalization(&res, P)) != MP_OKAY) goto LBL_RES;
+
+         /* now set M[1] to G * R mod m */
+         if ((err = mp_mulmod(G, &res, P, &M[1])) != MP_OKAY)     goto LBL_RES;
+      } else {
+         err = MP_VAL;
+         goto LBL_RES;
+      }
+   } else {
+      mp_set(&res, 1uL);
+      if ((err = mp_mod(G, P, &M[1])) != MP_OKAY)                 goto LBL_RES;
+   }
+
+   /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
+   if ((err = mp_copy(&M[1], &M[(size_t)1 << (winsize - 1)])) != MP_OKAY) goto LBL_RES;
+
+   for (x = 0; x < (winsize - 1); x++) {
+      if ((err = mp_sqr(&M[(size_t)1 << (winsize - 1)], &M[(size_t)1 << (winsize - 1)])) != MP_OKAY) goto LBL_RES;
+      if ((err = redux(&M[(size_t)1 << (winsize - 1)], P, mp)) != MP_OKAY) goto LBL_RES;
+   }
+
+   /* create upper table */
+   for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
+      if ((err = mp_mul(&M[x - 1], &M[1], &M[x])) != MP_OKAY)     goto LBL_RES;
+      if ((err = redux(&M[x], P, mp)) != MP_OKAY)                 goto LBL_RES;
+   }
+
+   /* set initial mode and bit cnt */
+   mode   = 0;
+   bitcnt = 1;
+   buf    = 0;
+   digidx = X->used - 1;
+   bitcpy = 0;
+   bitbuf = 0;
+
+   for (;;) {
+      /* grab next digit as required */
+      if (--bitcnt == 0) {
+         /* if digidx == -1 we are out of digits so break */
+         if (digidx == -1) {
+            break;
+         }
+         /* read next digit and reset bitcnt */
+         buf    = X->dp[digidx--];
+         bitcnt = (int)MP_DIGIT_BIT;
+      }
+
+      /* grab the next msb from the exponent */
+      y     = (mp_digit)(buf >> (MP_DIGIT_BIT - 1)) & 1uL;
+      buf <<= (mp_digit)1;
+
+      /* if the bit is zero and mode == 0 then we ignore it
+       * These represent the leading zero bits before the first 1 bit
+       * in the exponent.  Technically this opt is not required but it
+       * does lower the # of trivial squaring/reductions used
+       */
+      if ((mode == 0) && (y == 0)) {
+         continue;
+      }
+
+      /* if the bit is zero and mode == 1 then we square */
+      if ((mode == 1) && (y == 0)) {
+         if ((err = mp_sqr(&res, &res)) != MP_OKAY)               goto LBL_RES;
+         if ((err = redux(&res, P, mp)) != MP_OKAY)               goto LBL_RES;
+         continue;
+      }
+
+      /* else we add it to the window */
+      bitbuf |= (y << (winsize - ++bitcpy));
+      mode    = 2;
+
+      if (bitcpy == winsize) {
+         /* ok window is filled so square as required and multiply  */
+         /* square first */
+         for (x = 0; x < winsize; x++) {
+            if ((err = mp_sqr(&res, &res)) != MP_OKAY)            goto LBL_RES;
+            if ((err = redux(&res, P, mp)) != MP_OKAY)            goto LBL_RES;
+         }
+
+         /* then multiply */
+         if ((err = mp_mul(&res, &M[bitbuf], &res)) != MP_OKAY)   goto LBL_RES;
+         if ((err = redux(&res, P, mp)) != MP_OKAY)               goto LBL_RES;
+
+         /* empty window and reset */
+         bitcpy = 0;
+         bitbuf = 0;
+         mode   = 1;
+      }
+   }
+
+   /* if bits remain then square/multiply */
+   if ((mode == 2) && (bitcpy > 0)) {
+      /* square then multiply if the bit is set */
+      for (x = 0; x < bitcpy; x++) {
+         if ((err = mp_sqr(&res, &res)) != MP_OKAY)               goto LBL_RES;
+         if ((err = redux(&res, P, mp)) != MP_OKAY)               goto LBL_RES;
+
+         /* get next bit of the window */
+         bitbuf <<= 1;
+         if ((bitbuf & (1 << winsize)) != 0) {
+            /* then multiply */
+            if ((err = mp_mul(&res, &M[1], &res)) != MP_OKAY)     goto LBL_RES;
+            if ((err = redux(&res, P, mp)) != MP_OKAY)            goto LBL_RES;
+         }
+      }
+   }
+
+   if (redmode == 0) {
+      /* fixup result if Montgomery reduction is used
+       * recall that any value in a Montgomery system is
+       * actually multiplied by R mod n.  So we have
+       * to reduce one more time to cancel out the factor
+       * of R.
+       */
+      if ((err = redux(&res, P, mp)) != MP_OKAY)                  goto LBL_RES;
+   }
+
+   /* swap res with Y */
+   mp_exch(&res, Y);
+   err = MP_OKAY;
+LBL_RES:
+   mp_clear(&res);
+LBL_M:
+   mp_clear(&M[1]);
+   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
+      mp_clear(&M[x]);
+   }
+   return err;
+}
+#endif