1 files changed, 1131 insertions, 333 deletions
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/integer.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/integer.c
index b3ce4203e..56d684b46 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/integer.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/integer.c
@@ -1,8 +1,8 @@
 /* integer.c
  *
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
  *
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
  *
  * wolfSSL is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -16,10 +16,11 @@
  *
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
  */
 
 
+
 /*
  * Based on public domain LibTomMath 0.38 by Tom St Denis, tomstdenis@iahu.ca,
  * http://math.libtomcrypt.com
@@ -33,19 +34,70 @@
 /* in case user set USE_FAST_MATH there */
 #include <wolfssl/wolfcrypt/settings.h>
 
+#ifdef NO_INLINE
+    #include <wolfssl/wolfcrypt/misc.h>
+#else
+    #define WOLFSSL_MISC_INCLUDED
+    #include <wolfcrypt/src/misc.c>
+#endif
+
 #ifndef NO_BIG_INT
 
 #ifndef USE_FAST_MATH
 
+#ifndef WOLFSSL_SP_MATH
+
 #include <wolfssl/wolfcrypt/integer.h>
 
-#ifndef NO_WOLFSSL_SMALL_STACK
-    #ifndef WOLFSSL_SMALL_STACK
-        #define WOLFSSL_SMALL_STACK
+#if defined(FREESCALE_LTC_TFM)
+    #include <wolfssl/wolfcrypt/port/nxp/ksdk_port.h>
+#endif
+#ifdef WOLFSSL_DEBUG_MATH
+    #include <stdio.h>
+#endif
+
+#ifdef SHOW_GEN
+    #ifndef NO_STDIO_FILESYSTEM
+        #include <stdio.h>
     #endif
 #endif
 
-static void bn_reverse (unsigned char *s, int len);
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
+#ifdef __cplusplus
+    extern "C" {
+#endif
+WOLFSSL_LOCAL int sp_ModExp_1024(mp_int* base, mp_int* exp, mp_int* mod,
+    mp_int* res);
+WOLFSSL_LOCAL int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod,
+    mp_int* res);
+WOLFSSL_LOCAL int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod,
+    mp_int* res);
+WOLFSSL_LOCAL int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod,
+    mp_int* res);
+WOLFSSL_LOCAL int sp_ModExp_4096(mp_int* base, mp_int* exp, mp_int* mod,
+    mp_int* res);
+#ifdef __cplusplus
+    } /* extern "C" */
+#endif
+#endif
+
+/* reverse an array, used for radix code */
+static void
+bn_reverse (unsigned char *s, int len)
+{
+    int     ix, iy;
+    unsigned char t;
+
+    ix = 0;
+    iy = len - 1;
+    while (ix < iy) {
+        t     = s[ix];
+        s[ix] = s[iy];
+        s[iy] = t;
+        ++ix;
+        --iy;
+    }
+}
 
 /* math settings check */
 word32 CheckRunTimeSettings(void)
@@ -60,6 +112,13 @@ int mp_init_multi(mp_int* a, mp_int* b, mp_int* c, mp_int* d, mp_int* e,
 {
     int res = MP_OKAY;
 
+    if (a) XMEMSET(a, 0, sizeof(mp_int));
+    if (b) XMEMSET(b, 0, sizeof(mp_int));
+    if (c) XMEMSET(c, 0, sizeof(mp_int));
+    if (d) XMEMSET(d, 0, sizeof(mp_int));
+    if (e) XMEMSET(e, 0, sizeof(mp_int));
+    if (f) XMEMSET(f, 0, sizeof(mp_int));
+
     if (a && ((res = mp_init(a)) != MP_OKAY))
         return res;
 
@@ -95,33 +154,28 @@ int mp_init_multi(mp_int* a, mp_int* b, mp_int* c, mp_int* d, mp_int* e,
 /* init a new mp_int */
 int mp_init (mp_int * a)
 {
-  int i;
+  /* Safeguard against passing in a null pointer */
+  if (a == NULL)
+    return MP_VAL;
 
-  /* allocate memory required and clear it */
-  a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * MP_PREC, 0,
-                                      DYNAMIC_TYPE_BIGINT);
-  if (a->dp == NULL) {
-    return MP_MEM;
-  }
-
-  /* set the digits to zero */
-  for (i = 0; i < MP_PREC; i++) {
-      a->dp[i] = 0;
-  }
+  /* defer allocation until mp_grow */
+  a->dp = NULL;
 
   /* set the used to zero, allocated digits to the default precision
    * and sign to positive */
   a->used  = 0;
-  a->alloc = MP_PREC;
+  a->alloc = 0;
   a->sign  = MP_ZPOS;
+#ifdef HAVE_WOLF_BIGINT
+  wc_bigint_init(&a->raw);
+#endif
 
   return MP_OKAY;
 }
 
 
 /* clear one (frees)  */
-void
-mp_clear (mp_int * a)
+void mp_clear (mp_int * a)
 {
   int i;
 
@@ -136,15 +190,52 @@ mp_clear (mp_int * a)
     }
 
     /* free ram */
-    XFREE(a->dp, 0, DYNAMIC_TYPE_BIGINT);
+    mp_free(a);
 
     /* reset members to make debugging easier */
-    a->dp    = NULL;
     a->alloc = a->used = 0;
     a->sign  = MP_ZPOS;
   }
 }
 
+void mp_free (mp_int * a)
+{
+  /* only do anything if a hasn't been freed previously */
+  if (a->dp != NULL) {
+    /* free ram */
+    XFREE(a->dp, 0, DYNAMIC_TYPE_BIGINT);
+    a->dp    = NULL;
+  }
+
+#ifdef HAVE_WOLF_BIGINT
+  wc_bigint_free(&a->raw);
+#endif
+}
+
+void mp_forcezero(mp_int * a)
+{
+    if (a == NULL)
+        return;
+
+    /* only do anything if a hasn't been freed previously */
+    if (a->dp != NULL) {
+      /* force zero the used digits */
+      ForceZero(a->dp, a->used * sizeof(mp_digit));
+#ifdef HAVE_WOLF_BIGINT
+      wc_bigint_zero(&a->raw);
+#endif
+      /* free ram */
+      mp_free(a);
+
+      /* reset members to make debugging easier */
+      a->alloc = a->used = 0;
+      a->sign  = MP_ZPOS;
+    }
+
+    a->sign = MP_ZPOS;
+    a->used = 0;
+}
+
 
 /* get the size for an unsigned equivalent */
 int mp_unsigned_bin_size (mp_int * a)
@@ -155,8 +246,7 @@ int mp_unsigned_bin_size (mp_int * a)
 
 
 /* returns the number of bits in an int */
-int
-mp_count_bits (mp_int * a)
+int mp_count_bits (mp_int * a)
 {
   int     r;
   mp_digit q;
@@ -187,11 +277,11 @@ int mp_leading_bit (mp_int * a)
     if (mp_init_copy(&t, a) != MP_OKAY)
         return 0;
 
-    while (mp_iszero(&t) == 0) {
+    while (mp_iszero(&t) == MP_NO) {
 #ifndef MP_8BIT
         bit = (t.dp[0] & 0x80) != 0;
 #else
-        bit = (t.dp[0] | ((t.dp[1] & 0x01) << 7)) & 0x80 != 0;
+        bit = ((t.dp[0] | ((t.dp[1] & 0x01) << 7)) & 0x80) != 0;
 #endif
         if (mp_div_2d (&t, 8, &t, NULL) != MP_OKAY)
             break;
@@ -200,6 +290,22 @@ int mp_leading_bit (mp_int * a)
     return bit;
 }
 
+int mp_to_unsigned_bin_at_pos(int x, mp_int *t, unsigned char *b)
+{
+  int res = 0;
+  while (mp_iszero(t) == MP_NO) {
+#ifndef MP_8BIT
+      b[x++] = (unsigned char) (t->dp[0] & 255);
+#else
+      b[x++] = (unsigned char) (t->dp[0] | ((t->dp[1] & 0x01) << 7));
+#endif
+    if ((res = mp_div_2d (t, 8, t, NULL)) != MP_OKAY) {
+      return res;
+    }
+    res = x;
+  }
+  return res;
+}
 
 /* store in unsigned [big endian] format */
 int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
@@ -211,49 +317,62 @@ int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
     return res;
   }
 
-  x = 0;
-  while (mp_iszero (&t) == 0) {
-#ifndef MP_8BIT
-      b[x++] = (unsigned char) (t.dp[0] & 255);
-#else
-      b[x++] = (unsigned char) (t.dp[0] | ((t.dp[1] & 0x01) << 7));
-#endif
-    if ((res = mp_div_2d (&t, 8, &t, NULL)) != MP_OKAY) {
-      mp_clear (&t);
-      return res;
-    }
+  x = mp_to_unsigned_bin_at_pos(0, &t, b);
+  if (x < 0) {
+    mp_clear(&t);
+    return x;
   }
+
   bn_reverse (b, x);
   mp_clear (&t);
-  return MP_OKAY;
+  return res;
 }
 
+int mp_to_unsigned_bin_len(mp_int * a, unsigned char *b, int c)
+{
+    int i, len;
+
+    len = mp_unsigned_bin_size(a);
+
+    /* pad front w/ zeros to match length */
+    for (i = 0; i < c - len; i++)
+        b[i] = 0x00;
+    return mp_to_unsigned_bin(a, b + i);
+}
 
 /* creates "a" then copies b into it */
 int mp_init_copy (mp_int * a, mp_int * b)
 {
   int     res;
 
-  if ((res = mp_init (a)) != MP_OKAY) {
+  if ((res = mp_init_size (a, b->used)) != MP_OKAY) {
     return res;
   }
-  return mp_copy (b, a);
+
+  if((res = mp_copy (b, a)) != MP_OKAY) {
+    mp_clear(a);
+  }
+
+  return res;
 }
 
 
 /* copy, b = a */
-int
-mp_copy (mp_int * a, mp_int * b)
+int mp_copy (mp_int * a, mp_int * b)
 {
   int     res, n;
 
+  /* Safeguard against passing in a null pointer */
+  if (a == NULL || b == NULL)
+    return MP_VAL;
+
   /* if dst == src do nothing */
   if (a == b) {
     return MP_OKAY;
   }
 
   /* grow dest */
-  if (b->alloc < a->used) {
+  if (b->alloc < a->used || b->alloc == 0) {
      if ((res = mp_grow (b, a->used)) != MP_OKAY) {
         return res;
      }
@@ -261,7 +380,7 @@ mp_copy (mp_int * a, mp_int * b)
 
   /* zero b and copy the parameters over */
   {
-    register mp_digit *tmpa, *tmpb;
+    mp_digit *tmpa, *tmpb;
 
     /* pointer aliases */
 
@@ -277,7 +396,7 @@ mp_copy (mp_int * a, mp_int * b)
     }
 
     /* clear high digits */
-    for (; n < b->used; n++) {
+    for (; n < b->used && b->dp; n++) {
       *tmpb++ = 0;
     }
   }
@@ -296,7 +415,7 @@ int mp_grow (mp_int * a, int size)
   mp_digit *tmp;
 
   /* if the alloc size is smaller alloc more ram */
-  if (a->alloc < size) {
+  if (a->alloc < size || size == 0) {
     /* ensure there are always at least MP_PREC digits extra on top */
     size += (MP_PREC * 2) - (size % MP_PREC);
 
@@ -306,8 +425,8 @@ int mp_grow (mp_int * a, int size)
      * in case the operation failed we don't want
      * to overwrite the dp member of a.
      */
-    tmp = OPT_CAST(mp_digit) XREALLOC (a->dp, sizeof (mp_digit) * size, 0,
-                                       DYNAMIC_TYPE_BIGINT);
+    tmp = OPT_CAST(mp_digit) XREALLOC (a->dp, sizeof (mp_digit) * size, NULL,
+                                                           DYNAMIC_TYPE_BIGINT);
     if (tmp == NULL) {
       /* reallocation failed but "a" is still valid [can be freed] */
       return MP_MEM;
@@ -327,25 +446,6 @@ int mp_grow (mp_int * a, int size)
 }
 
 
-/* reverse an array, used for radix code */
-void
-bn_reverse (unsigned char *s, int len)
-{
-  int     ix, iy;
-  unsigned char t;
-
-  ix = 0;
-  iy = len - 1;
-  while (ix < iy) {
-    t     = s[ix];
-    s[ix] = s[iy];
-    s[iy] = t;
-    ++ix;
-    --iy;
-  }
-}
-
-
 /* shift right by a certain bit count (store quotient in c, optional
    remainder in d) */
 int mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d)
@@ -406,6 +506,9 @@ void mp_zero (mp_int * a)
   int       n;
   mp_digit *tmp;
 
+  if (a == NULL)
+      return;
+
   a->sign = MP_ZPOS;
   a->used = 0;
 
@@ -419,12 +522,11 @@ void mp_zero (mp_int * a)
 /* trim unused digits
  *
  * This is used to ensure that leading zero digits are
- * trimed and the leading "used" digit will be non-zero
+ * trimmed and the leading "used" digit will be non-zero
  * Typically very fast.  Also fixes the sign if there
  * are no more leading digits
  */
-void
-mp_clamp (mp_int * a)
+void mp_clamp (mp_int * a)
 {
   /* decrease used while the most significant digit is
    * zero.
@@ -443,8 +545,7 @@ mp_clamp (mp_int * a)
 /* swap the elements of two integers, for cases where you can't simply swap the
  * mp_int pointers around
  */
-void
-mp_exch (mp_int * a, mp_int * b)
+void mp_exch (mp_int * a, mp_int * b)
 {
   mp_int  t;
 
@@ -457,10 +558,12 @@ mp_exch (mp_int * a, mp_int * b)
 /* shift right a certain number of bits */
 void mp_rshb (mp_int *c, int x)
 {
-    register mp_digit *tmpc, mask, shift;
+    mp_digit *tmpc, mask, shift;
     mp_digit r, rr;
     mp_digit D = x;
 
+    if (mp_iszero(c)) return;
+
     /* mask */
     mask = (((mp_digit)1) << D) - 1;
 
@@ -483,6 +586,7 @@ void mp_rshb (mp_int *c, int x)
       /* set the carry to the carry bits of the current word found above */
       r = rr;
     }
+    mp_clamp(c);
 }
 
 
@@ -503,7 +607,7 @@ void mp_rshd (mp_int * a, int b)
   }
 
   {
-    register mp_digit *bottom, *top;
+    mp_digit *bottom, *top;
 
     /* shift the digits down */
 
@@ -539,8 +643,7 @@ void mp_rshd (mp_int * a, int b)
 
 
 /* calc a value mod 2**b */
-int
-mp_mod_2d (mp_int * a, int b, mp_int * c)
+int mp_mod_2d (mp_int * a, int b, mp_int * c)
 {
   int     x, res;
 
@@ -637,8 +740,8 @@ int mp_mul_2d (mp_int * a, int b, mp_int * c)
   /* shift any bit count < DIGIT_BIT */
   d = (mp_digit) (b % DIGIT_BIT);
   if (d != 0) {
-    register mp_digit *tmpc, shift, mask, r, rr;
-    register int x;
+    mp_digit *tmpc, shift, mask, r, rr;
+    int x;
 
     /* bitmask for carries */
     mask = (((mp_digit)1) << d) - 1;
@@ -656,7 +759,7 @@ int mp_mul_2d (mp_int * a, int b, mp_int * c)
       rr = (*tmpc >> shift) & mask;
 
       /* shift the current word and OR in the carry */
-      *tmpc = ((*tmpc << d) | r) & MP_MASK;
+      *tmpc = (mp_digit)(((*tmpc << d) | r) & MP_MASK);
       ++tmpc;
 
       /* set the carry to the carry bits of the current word */
@@ -691,7 +794,7 @@ int mp_lshd (mp_int * a, int b)
   }
 
   {
-    register mp_digit *top, *bottom;
+    mp_digit *top, *bottom;
 
     /* increment the used by the shift amount then copy upwards */
     a->used += b;
@@ -703,7 +806,7 @@ int mp_lshd (mp_int * a, int b)
     bottom = a->dp + a->used - 1 - b;
 
     /* much like mp_rshd this is implemented using a sliding window
-     * except the window goes the otherway around.  Copying from
+     * except the window goes the other way around.  Copying from
      * the bottom to the top.  see bn_mp_rshd.c for more info.
      */
     for (x = a->used - 1; x >= b; x--) {
@@ -722,17 +825,33 @@ int mp_lshd (mp_int * a, int b)
 
 /* this is a shell function that calls either the normal or Montgomery
  * exptmod functions.  Originally the call to the montgomery code was
- * embedded in the normal function but that wasted alot of stack space
+ * embedded in the normal function but that wasted a lot of stack space
  * for nothing (since 99% of the time the Montgomery code would be called)
  */
+#if defined(FREESCALE_LTC_TFM)
+int wolfcrypt_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
+#else
 int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
+#endif
 {
   int dr;
 
   /* modulus P must be positive */
-  if (P->sign == MP_NEG) {
+  if (mp_iszero(P) || P->sign == MP_NEG) {
      return MP_VAL;
   }
+  if (mp_isone(P)) {
+     mp_set(Y, 0);
+     return MP_OKAY;
+  }
+  if (mp_iszero(X)) {
+     mp_set(Y, 1);
+     return MP_OKAY;
+  }
+  if (mp_iszero(G)) {
+     mp_set(Y, 0);
+     return MP_OKAY;
+  }
 
   /* if exponent X is negative we have to recurse */
   if (X->sign == MP_NEG) {
@@ -771,6 +890,12 @@ int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
 #endif
   }
 
+#ifdef BN_MP_EXPTMOD_BASE_2
+  if (G->used == 1 && G->dp[0] == 2) {
+    return mp_exptmod_base_2(X, P, Y);
+  }
+#endif
+
 /* modified diminished radix reduction */
 #if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C) && \
   defined(BN_S_MP_EXPTMOD_C)
@@ -787,6 +912,8 @@ int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
   dr = 0;
 #endif
 
+  (void)dr;
+
 #ifdef BN_MP_REDUCE_IS_2K_C
   /* if not, is it a unrestricted DR modulus? */
   if (dr == 0) {
@@ -796,7 +923,7 @@ int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
 
   /* if the modulus is odd or dr != 0 use the montgomery method */
 #ifdef BN_MP_EXPTMOD_FAST_C
-  if (mp_isodd (P) == 1 || dr !=  0) {
+  if (mp_isodd (P) == MP_YES || dr !=  0) {
     return mp_exptmod_fast (G, X, P, Y, dr);
   } else {
 #endif
@@ -812,13 +939,17 @@ int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
 #endif
 }
 
+int mp_exptmod_ex (mp_int * G, mp_int * X, int digits, mp_int * P, mp_int * Y)
+{
+    (void)digits;
+    return mp_exptmod(G, X, P, Y);
+}
 
 /* b = |a|
  *
  * Simple function copies the input and fixes the sign to positive
  */
-int
-mp_abs (mp_int * a, mp_int * b)
+int mp_abs (mp_int * a, mp_int * b)
 {
   int     res;
 
@@ -837,22 +968,28 @@ mp_abs (mp_int * a, mp_int * b)
 
 
 /* hac 14.61, pp608 */
+#if defined(FREESCALE_LTC_TFM)
+int wolfcrypt_mp_invmod(mp_int * a, mp_int * b, mp_int * c)
+#else
 int mp_invmod (mp_int * a, mp_int * b, mp_int * c)
+#endif
 {
-  /* b cannot be negative */
-  if (b->sign == MP_NEG || mp_iszero(b) == 1) {
+  /* b cannot be negative or zero, and can not divide by 0 (1/a mod b) */
+  if (b->sign == MP_NEG || mp_iszero(b) == MP_YES || mp_iszero(a) == MP_YES) {
     return MP_VAL;
   }
 
 #ifdef BN_FAST_MP_INVMOD_C
   /* if the modulus is odd we can use a faster routine instead */
-  if (mp_isodd (b) == 1) {
+  if ((mp_isodd(b) == MP_YES) && (mp_cmp_d(b, 1) != MP_EQ)) {
     return fast_mp_invmod (a, b, c);
   }
 #endif
 
 #ifdef BN_MP_INVMOD_SLOW_C
   return mp_invmod_slow(a, b, c);
+#else
+  return MP_VAL;
 #endif
 }
 
@@ -869,7 +1006,7 @@ int fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
   int     res, neg, loop_check = 0;
 
   /* 2. [modified] b must be odd   */
-  if (mp_iseven (b) == 1) {
+  if (mp_iseven (b) == MP_YES) {
     return MP_VAL;
   }
 
@@ -895,17 +1032,19 @@ int fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
   if ((res = mp_copy (&y, &v)) != MP_OKAY) {
     goto LBL_ERR;
   }
-  mp_set (&D, 1);
+  if ((res = mp_set (&D, 1)) != MP_OKAY) {
+    goto LBL_ERR;
+  }
 
 top:
   /* 4.  while u is even do */
-  while (mp_iseven (&u) == 1) {
+  while (mp_iseven (&u) == MP_YES) {
     /* 4.1 u = u/2 */
     if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
       goto LBL_ERR;
     }
     /* 4.2 if B is odd then */
-    if (mp_isodd (&B) == 1) {
+    if (mp_isodd (&B) == MP_YES) {
       if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
         goto LBL_ERR;
       }
@@ -917,13 +1056,13 @@ top:
   }
 
   /* 5.  while v is even do */
-  while (mp_iseven (&v) == 1) {
+  while (mp_iseven (&v) == MP_YES) {
     /* 5.1 v = v/2 */
     if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
       goto LBL_ERR;
     }
     /* 5.2 if D is odd then */
-    if (mp_isodd (&D) == 1) {
+    if (mp_isodd (&D) == MP_YES) {
       /* D = (D-x)/2 */
       if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
         goto LBL_ERR;
@@ -957,8 +1096,8 @@ top:
   }
 
   /* if not zero goto step 4 */
-  if (mp_iszero (&u) == 0) {
-    if (++loop_check > 1024) {
+  if (mp_iszero (&u) == MP_NO) {
+    if (++loop_check > MAX_INVMOD_SZ) {
         res = MP_VAL;
         goto LBL_ERR;
     }
@@ -980,6 +1119,12 @@ top:
       goto LBL_ERR;
     }
   }
+  /* too big */
+  while (mp_cmp_mag(&D, b) != MP_LT) {
+      if ((res = mp_sub(&D, b, &D)) != MP_OKAY) {
+         goto LBL_ERR;
+      }
+  }
   mp_exch (&D, c);
   c->sign = neg;
   res = MP_OKAY;
@@ -1001,31 +1146,42 @@ int mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c)
   int     res;
 
   /* b cannot be negative */
-  if (b->sign == MP_NEG || mp_iszero(b) == 1) {
+  if (b->sign == MP_NEG || mp_iszero(b) == MP_YES) {
     return MP_VAL;
   }
 
   /* init temps */
   if ((res = mp_init_multi(&x, &y, &u, &v,
                            &A, &B)) != MP_OKAY) {
-     return res;
+    return res;
   }
 
   /* init rest of tmps temps */
   if ((res = mp_init_multi(&C, &D, 0, 0, 0, 0)) != MP_OKAY) {
-     return res;
+    mp_clear(&x);
+    mp_clear(&y);
+    mp_clear(&u);
+    mp_clear(&v);
+    mp_clear(&A);
+    mp_clear(&B);
+    return res;
   }
 
   /* x = a, y = b */
   if ((res = mp_mod(a, b, &x)) != MP_OKAY) {
-      goto LBL_ERR;
+    goto LBL_ERR;
+  }
+  if (mp_isone(&x)) {
+    mp_set(c, 1);
+    res = MP_OKAY;
+    goto LBL_ERR;
   }
   if ((res = mp_copy (b, &y)) != MP_OKAY) {
     goto LBL_ERR;
   }
 
   /* 2. [modified] if x,y are both even then return an error! */
-  if (mp_iseven (&x) == 1 && mp_iseven (&y) == 1) {
+  if (mp_iseven (&x) == MP_YES && mp_iseven (&y) == MP_YES) {
     res = MP_VAL;
     goto LBL_ERR;
   }
@@ -1037,24 +1193,28 @@ int mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c)
   if ((res = mp_copy (&y, &v)) != MP_OKAY) {
     goto LBL_ERR;
   }
-  mp_set (&A, 1);
-  mp_set (&D, 1);
+  if ((res = mp_set (&A, 1)) != MP_OKAY) {
+    goto LBL_ERR;
+  }
+  if ((res = mp_set (&D, 1)) != MP_OKAY) {
+    goto LBL_ERR;
+  }
 
 top:
   /* 4.  while u is even do */
-  while (mp_iseven (&u) == 1) {
+  while (mp_iseven (&u) == MP_YES) {
     /* 4.1 u = u/2 */
     if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
       goto LBL_ERR;
     }
     /* 4.2 if A or B is odd then */
-    if (mp_isodd (&A) == 1 || mp_isodd (&B) == 1) {
+    if (mp_isodd (&A) == MP_YES || mp_isodd (&B) == MP_YES) {
       /* A = (A+y)/2, B = (B-x)/2 */
       if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
-         goto LBL_ERR;
+        goto LBL_ERR;
       }
       if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
-         goto LBL_ERR;
+        goto LBL_ERR;
       }
     }
     /* A = A/2, B = B/2 */
@@ -1067,19 +1227,19 @@ top:
   }
 
   /* 5.  while v is even do */
-  while (mp_iseven (&v) == 1) {
+  while (mp_iseven (&v) == MP_YES) {
     /* 5.1 v = v/2 */
     if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
       goto LBL_ERR;
     }
     /* 5.2 if C or D is odd then */
-    if (mp_isodd (&C) == 1 || mp_isodd (&D) == 1) {
+    if (mp_isodd (&C) == MP_YES || mp_isodd (&D) == MP_YES) {
       /* C = (C+y)/2, D = (D-x)/2 */
       if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
-         goto LBL_ERR;
+        goto LBL_ERR;
       }
       if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
-         goto LBL_ERR;
+        goto LBL_ERR;
       }
     }
     /* C = C/2, D = D/2 */
@@ -1121,7 +1281,7 @@ top:
   }
 
   /* if not zero goto step 4 */
-  if (mp_iszero (&u) == 0)
+  if (mp_iszero (&u) == MP_NO)
     goto top;
 
   /* now a = C, b = D, gcd == g*v */
@@ -1161,7 +1321,7 @@ LBL_ERR:mp_clear(&x);
 }
 
 
-/* compare maginitude of two ints (unsigned) */
+/* compare magnitude of two ints (unsigned) */
 int mp_cmp_mag (mp_int * a, mp_int * b)
 {
   int     n;
@@ -1197,8 +1357,7 @@ int mp_cmp_mag (mp_int * a, mp_int * b)
 
 
 /* compare two ints (signed)*/
-int
-mp_cmp (mp_int * a, mp_int * b)
+int mp_cmp (mp_int * a, mp_int * b)
 {
   /* compare based on sign */
   if (a->sign != b->sign) {
@@ -1222,8 +1381,12 @@ mp_cmp (mp_int * a, mp_int * b)
 /* compare a digit */
 int mp_cmp_d(mp_int * a, mp_digit b)
 {
+  /* special case for zero*/
+  if (a->used == 0 && b == 0)
+    return MP_EQ;
+
   /* compare based on sign */
-  if (a->sign == MP_NEG) {
+  if ((b && a->used == 0) || a->sign == MP_NEG) {
     return MP_LT;
   }
 
@@ -1244,22 +1407,38 @@ int mp_cmp_d(mp_int * a, mp_digit b)
 
 
 /* set to a digit */
-void mp_set (mp_int * a, mp_digit b)
+int mp_set (mp_int * a, mp_digit b)
 {
+  int res;
   mp_zero (a);
-  a->dp[0] = b & MP_MASK;
-  a->used  = (a->dp[0] != 0) ? 1 : 0;
+  res = mp_grow (a, 1);
+  if (res == MP_OKAY) {
+    a->dp[0] = (mp_digit)(b & MP_MASK);
+    a->used  = (a->dp[0] != 0) ? 1 : 0;
+  }
+  return res;
 }
 
+/* check if a bit is set */
+int mp_is_bit_set (mp_int *a, mp_digit b)
+{
+    if ((mp_digit)a->used < b/DIGIT_BIT)
+        return 0;
+
+    return (int)((a->dp[b/DIGIT_BIT] >> b%DIGIT_BIT) & (mp_digit)1);
+}
 
 /* c = a mod b, 0 <= c < b */
-int
-mp_mod (mp_int * a, mp_int * b, mp_int * c)
+#if defined(FREESCALE_LTC_TFM)
+int wolfcrypt_mp_mod(mp_int * a, mp_int * b, mp_int * c)
+#else
+int mp_mod (mp_int * a, mp_int * b, mp_int * c)
+#endif
 {
   mp_int  t;
   int     res;
 
-  if ((res = mp_init (&t)) != MP_OKAY) {
+  if ((res = mp_init_size (&t, b->used)) != MP_OKAY) {
     return res;
   }
 
@@ -1268,11 +1447,11 @@ mp_mod (mp_int * a, mp_int * b, mp_int * c)
     return res;
   }
 
-  if (t.sign != b->sign) {
-    res = mp_add (b, &t, c);
-  } else {
+  if ((mp_iszero(&t) != MP_NO) || (t.sign == b->sign)) {
     res = MP_OKAY;
     mp_exch (&t, c);
+  } else {
+    res = mp_add (b, &t, c);
   }
 
   mp_clear (&t);
@@ -1287,7 +1466,7 @@ int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d)
    int    res, n, n2;
 
   /* is divisor zero ? */
-  if (mp_iszero (b) == 1) {
+  if (mp_iszero (b) == MP_YES) {
     return MP_VAL;
   }
 
@@ -1309,8 +1488,9 @@ int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d)
      return res;
   }
 
-
-  mp_set(&tq, 1);
+  if ((res = mp_set(&tq, 1)) != MP_OKAY) {
+     return res;
+  }
   n = mp_count_bits(a) - mp_count_bits(b);
   if (((res = mp_abs(a, &ta)) != MP_OKAY) ||
       ((res = mp_abs(b, &tb)) != MP_OKAY) ||
@@ -1367,7 +1547,7 @@ int mp_div_2(mp_int * a, mp_int * b)
   oldused = b->used;
   b->used = a->used;
   {
-    register mp_digit r, rr, *tmpa, *tmpb;
+    mp_digit r, rr, *tmpa, *tmpb;
 
     /* source alias */
     tmpa = a->dp + b->used - 1;
@@ -1403,7 +1583,7 @@ int mp_div_2(mp_int * a, mp_int * b)
 /* high level addition (handles signs) */
 int mp_add (mp_int * a, mp_int * b, mp_int * c)
 {
-  int     sa, sb, res;
+  int sa, sb, res;
 
   /* get sign of both inputs */
   sa = a->sign;
@@ -1433,39 +1613,38 @@ int mp_add (mp_int * a, mp_int * b, mp_int * c)
 
 
 /* low level addition, based on HAC pp.594, Algorithm 14.7 */
-int
-s_mp_add (mp_int * a, mp_int * b, mp_int * c)
+int s_mp_add (mp_int * a, mp_int * b, mp_int * c)
 {
   mp_int *x;
-  int     olduse, res, min, max;
+  int     olduse, res, min_ab, max_ab;
 
   /* find sizes, we let |a| <= |b| which means we have to sort
    * them.  "x" will point to the input with the most digits
    */
   if (a->used > b->used) {
-    min = b->used;
-    max = a->used;
+    min_ab = b->used;
+    max_ab = a->used;
     x = a;
   } else {
-    min = a->used;
-    max = b->used;
+    min_ab = a->used;
+    max_ab = b->used;
     x = b;
   }
 
   /* init result */
-  if (c->alloc < max + 1) {
-    if ((res = mp_grow (c, max + 1)) != MP_OKAY) {
+  if (c->alloc < max_ab + 1) {
+    if ((res = mp_grow (c, max_ab + 1)) != MP_OKAY) {
       return res;
     }
   }
 
   /* get old used digit count and set new one */
   olduse = c->used;
-  c->used = max + 1;
+  c->used = max_ab + 1;
 
   {
-    register mp_digit u, *tmpa, *tmpb, *tmpc;
-    register int i;
+    mp_digit u, *tmpa, *tmpb, *tmpc;
+    int i;
 
     /* alias for digit pointers */
 
@@ -1480,7 +1659,7 @@ s_mp_add (mp_int * a, mp_int * b, mp_int * c)
 
     /* zero the carry */
     u = 0;
-    for (i = 0; i < min; i++) {
+    for (i = 0; i < min_ab; i++) {
       /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
       *tmpc = *tmpa++ + *tmpb++ + u;
 
@@ -1494,8 +1673,8 @@ s_mp_add (mp_int * a, mp_int * b, mp_int * c)
     /* now copy higher words if any, that is in A+B
      * if A or B has more digits add those in
      */
-    if (min != max) {
-      for (; i < max; i++) {
+    if (min_ab != max_ab) {
+      for (; i < max_ab; i++) {
         /* T[i] = X[i] + U */
         *tmpc = x->dp[i] + u;
 
@@ -1510,7 +1689,7 @@ s_mp_add (mp_int * a, mp_int * b, mp_int * c)
     /* add carry */
     *tmpc++ = u;
 
-    /* clear digits above oldused */
+    /* clear digits above olduse */
     for (i = c->used; i < olduse; i++) {
       *tmpc++ = 0;
     }
@@ -1522,27 +1701,31 @@ s_mp_add (mp_int * a, mp_int * b, mp_int * c)
 
 
 /* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */
-int
-s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
+int s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
 {
-  int     olduse, res, min, max;
+  int     olduse, res, min_b, max_a;
 
   /* find sizes */
-  min = b->used;
-  max = a->used;
+  min_b = b->used;
+  max_a = a->used;
 
   /* init result */
-  if (c->alloc < max) {
-    if ((res = mp_grow (c, max)) != MP_OKAY) {
+  if (c->alloc < max_a) {
+    if ((res = mp_grow (c, max_a)) != MP_OKAY) {
       return res;
     }
   }
+
+  /* sanity check on destination */
+  if (c->dp == NULL)
+     return MP_VAL;
+
   olduse = c->used;
-  c->used = max;
+  c->used = max_a;
 
   {
-    register mp_digit u, *tmpa, *tmpb, *tmpc;
-    register int i;
+    mp_digit u, *tmpa, *tmpb, *tmpc;
+    int i;
 
     /* alias for digit pointers */
     tmpa = a->dp;
@@ -1551,7 +1734,7 @@ s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
 
     /* set carry to zero */
     u = 0;
-    for (i = 0; i < min; i++) {
+    for (i = 0; i < min_b; i++) {
       /* T[i] = A[i] - B[i] - U */
       *tmpc = *tmpa++ - *tmpb++ - u;
 
@@ -1567,7 +1750,7 @@ s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
     }
 
     /* now copy higher words if any, e.g. if A has more digits than B  */
-    for (; i < max; i++) {
+    for (; i < max_a; i++) {
       /* T[i] = A[i] - U */
       *tmpc = *tmpa++ - u;
 
@@ -1590,8 +1773,7 @@ s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
 
 
 /* high level subtraction (handles signs) */
-int
-mp_sub (mp_int * a, mp_int * b, mp_int * c)
+int mp_sub (mp_int * a, mp_int * b, mp_int * c)
 {
   int     sa, sb, res;
 
@@ -1725,7 +1907,7 @@ int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y,
   mp_digit buf, mp;
   int     err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
 #ifdef WOLFSSL_SMALL_STACK
-  mp_int* M = NULL;
+  mp_int* M;
 #else
   mp_int M[TAB_SIZE];
 #endif
@@ -1733,11 +1915,11 @@ int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y,
    * one of many reduction algorithms without modding the guts of
    * the code with if statements everywhere.
    */
-  int     (*redux)(mp_int*,mp_int*,mp_digit);
+  int     (*redux)(mp_int*,mp_int*,mp_digit) = NULL;
 
 #ifdef WOLFSSL_SMALL_STACK
   M = (mp_int*) XMALLOC(sizeof(mp_int) * TAB_SIZE, NULL,
-                                                       DYNAMIC_TYPE_TMP_BUFFER);
+                                                       DYNAMIC_TYPE_BIGINT);
   if (M == NULL)
     return MP_MEM;
 #endif
@@ -1768,9 +1950,9 @@ int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y,
 
   /* init M array */
   /* init first cell */
-  if ((err = mp_init(&M[1])) != MP_OKAY) {
+  if ((err = mp_init_size(&M[1], P->alloc)) != MP_OKAY) {
 #ifdef WOLFSSL_SMALL_STACK
-  XFREE(M, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+     XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
 #endif
 
      return err;
@@ -1778,14 +1960,14 @@ int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y,
 
   /* now init the second half of the array */
   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
-    if ((err = mp_init(&M[x])) != MP_OKAY) {
+    if ((err = mp_init_size(&M[x], P->alloc)) != MP_OKAY) {
       for (y = 1<<(winsize-1); y < x; y++) {
         mp_clear (&M[y]);
       }
       mp_clear(&M[1]);
 
 #ifdef WOLFSSL_SMALL_STACK
-      XFREE(M, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+      XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
 #endif
 
       return err;
@@ -1807,7 +1989,7 @@ int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y,
      /* automatically pick the comba one if available (saves quite a few
         calls/ifs) */
 #ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
-     if (((P->used * 2 + 1) < MP_WARRAY) &&
+     if (((P->used * 2 + 1) < (int)MP_WARRAY) &&
           P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
         redux = fast_mp_montgomery_reduce;
      } else
@@ -1816,9 +1998,6 @@ int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y,
 #ifdef BN_MP_MONTGOMERY_REDUCE_C
         /* use slower baseline Montgomery method */
         redux = mp_montgomery_reduce;
-#else
-        err = MP_VAL;
-        goto LBL_M;
 #endif
      }
   } else if (redmode == 1) {
@@ -1826,9 +2005,6 @@ int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y,
      /* setup DR reduction for moduli of the form B**k - b */
      mp_dr_setup(P, &mp);
      redux = mp_dr_reduce;
-#else
-     err = MP_VAL;
-     goto LBL_M;
 #endif
   } else {
 #if defined(BN_MP_REDUCE_2K_SETUP_C) && defined(BN_MP_REDUCE_2K_C)
@@ -1837,14 +2013,16 @@ int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y,
         goto LBL_M;
      }
      redux = mp_reduce_2k;
-#else
+#endif
+  }
+
+  if (redux == NULL) {
      err = MP_VAL;
      goto LBL_M;
-#endif
   }
 
   /* setup result */
-  if ((err = mp_init (&res)) != MP_OKAY) {
+  if ((err = mp_init_size (&res, P->alloc)) != MP_OKAY) {
     goto LBL_M;
   }
 
@@ -1861,17 +2039,19 @@ int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y,
      if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) {
        goto LBL_RES;
      }
-#else
-     err = MP_VAL;
-     goto LBL_RES;
-#endif
 
      /* now set M[1] to G * R mod m */
      if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) {
        goto LBL_RES;
      }
+#else
+     err = MP_VAL;
+     goto LBL_RES;
+#endif
   } else {
-     mp_set(&res, 1);
+     if ((err = mp_set(&res, 1)) != MP_OKAY) {
+        goto LBL_RES;
+     }
      if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
         goto LBL_RES;
      }
@@ -1883,7 +2063,8 @@ int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y,
   }
 
   for (x = 0; x < (winsize - 1); x++) {
-    if ((err = mp_sqr (&M[(mp_digit)(1 << (winsize - 1))], &M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
+    if ((err = mp_sqr (&M[(mp_digit)(1 << (winsize - 1))],
+                       &M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
       goto LBL_RES;
     }
     if ((err = redux (&M[(mp_digit)(1 << (winsize - 1))], P, mp)) != MP_OKAY) {
@@ -2024,16 +2205,179 @@ LBL_M:
   }
 
 #ifdef WOLFSSL_SMALL_STACK
-  XFREE(M, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+  XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+
+  return err;
+}
+
+#ifdef BN_MP_EXPTMOD_BASE_2
+#if DIGIT_BIT < 16
+    #define WINSIZE    3
+#elif DIGIT_BIT < 32
+    #define WINSIZE    4
+#elif DIGIT_BIT < 64
+    #define WINSIZE    5
+#elif DIGIT_BIT < 128
+    #define WINSIZE    6
+#endif
+int mp_exptmod_base_2(mp_int * X, mp_int * P, mp_int * Y)
+{
+  mp_digit buf, mp;
+  int      err = MP_OKAY, bitbuf, bitcpy, bitcnt, digidx, x, y;
+#ifdef WOLFSSL_SMALL_STACK
+  mp_int  *res = NULL;
+#else
+  mp_int   res[1];
+#endif
+  int     (*redux)(mp_int*,mp_int*,mp_digit) = NULL;
+
+  /* automatically pick the comba one if available (saves quite a few
+     calls/ifs) */
+#ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
+  if (((P->used * 2 + 1) < (int)MP_WARRAY) &&
+       P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
+     redux = fast_mp_montgomery_reduce;
+  } else
 #endif
+  {
+#ifdef BN_MP_MONTGOMERY_REDUCE_C
+     /* use slower baseline Montgomery method */
+     redux = mp_montgomery_reduce;
+#else
+     return MP_VAL;
+#endif
+  }
+
+#ifdef WOLFSSL_SMALL_STACK
+  res = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_TMP_BUFFER);
+  if (res == NULL) {
+     return MP_MEM;
+  }
+#endif
+
+  /* now setup montgomery  */
+  if ((err = mp_montgomery_setup(P, &mp)) != MP_OKAY) {
+     goto LBL_M;
+  }
+
+  /* setup result */
+  if ((err = mp_init(res)) != MP_OKAY) {
+     goto LBL_M;
+  }
+
+  /* now we need R mod m */
+  if ((err = mp_montgomery_calc_normalization(res, P)) != MP_OKAY) {
+     goto LBL_RES;
+  }
+
+  /* Get the top bits left over after taking WINSIZE bits starting at the
+   * least-significant.
+   */
+  digidx = X->used - 1;
+  bitcpy = (X->used * DIGIT_BIT) % WINSIZE;
+  if (bitcpy > 0) {
+     bitcnt = (int)DIGIT_BIT - bitcpy;
+     buf    = X->dp[digidx--];
+     bitbuf = (int)(buf >> bitcnt);
+     /* Multiply montgomery representation of 1 by 2 ^ top */
+     err = mp_mul_2d(res, bitbuf, res);
+     if (err != MP_OKAY) {
+        goto LBL_RES;
+     }
+     err = mp_mod(res, P, res);
+     if (err != MP_OKAY) {
+        goto LBL_RES;
+     }
+     /* Move out bits used */
+     buf  <<= bitcpy;
+     bitcnt++;
+  }
+  else {
+     bitcnt = 1;
+     buf    = 0;
+  }
+
+  /* empty window and reset  */
+  bitbuf = 0;
+  bitcpy = 0;
+
+  for (;;) {
+    /* grab next digit as required */
+    if (--bitcnt == 0) {
+      /* if digidx == -1 we are out of digits so break */
+      if (digidx == -1) {
+        break;
+      }
+      /* read next digit and reset bitcnt */
+      buf    = X->dp[digidx--];
+      bitcnt = (int)DIGIT_BIT;
+    }
+
+    /* grab the next msb from the exponent */
+    y       = (int)(buf >> (DIGIT_BIT - 1)) & 1;
+    buf   <<= (mp_digit)1;
+    /* add bit to the window */
+    bitbuf |= (y << (WINSIZE - ++bitcpy));
+
+    if (bitcpy == WINSIZE) {
+      /* ok window is filled so square as required and multiply  */
+      /* square first */
+      for (x = 0; x < WINSIZE; x++) {
+        err = mp_sqr(res, res);
+        if (err != MP_OKAY) {
+          goto LBL_RES;
+        }
+        err = (*redux)(res, P, mp);
+        if (err != MP_OKAY) {
+          goto LBL_RES;
+        }
+      }
+
+      /* then multiply by 2^bitbuf */
+      err = mp_mul_2d(res, bitbuf, res);
+      if (err != MP_OKAY) {
+         goto LBL_RES;
+      }
+      err = mp_mod(res, P, res);
+      if (err != MP_OKAY) {
+         goto LBL_RES;
+      }
+
+      /* empty window and reset */
+      bitcpy = 0;
+      bitbuf = 0;
+    }
+  }
+
+  /* fixup result if Montgomery reduction is used
+   * recall that any value in a Montgomery system is
+   * actually multiplied by R mod n.  So we have
+   * to reduce one more time to cancel out the factor
+   * of R.
+   */
+  err = (*redux)(res, P, mp);
+  if (err != MP_OKAY) {
+     goto LBL_RES;
+  }
+
+  /* swap res with Y */
+  mp_copy(res, Y);
 
+LBL_RES:mp_clear (res);
+LBL_M:
+#ifdef WOLFSSL_SMALL_STACK
+  XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
   return err;
 }
 
+#undef WINSIZE
+#endif /* BN_MP_EXPTMOD_BASE_2 */
+
 
 /* setups the montgomery reduction stuff */
-int
-mp_montgomery_setup (mp_int * n, mp_digit * rho)
+int mp_montgomery_setup (mp_int * n, mp_digit * rho)
 {
   mp_digit x, b;
 
@@ -2065,7 +2409,7 @@ mp_montgomery_setup (mp_int * n, mp_digit * rho)
 
   /* rho = -1/m mod b */
   /* TAO, switched mp_word casts to mp_digit to shut up compiler */
-  *rho = (((mp_digit)1 << ((mp_digit) DIGIT_BIT)) - x) & MP_MASK;
+  *rho = (mp_digit)((((mp_digit)1 << ((mp_digit) DIGIT_BIT)) - x) & MP_MASK);
 
   return MP_OKAY;
 }
@@ -2099,7 +2443,7 @@ int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
   }
 
 #ifdef WOLFSSL_SMALL_STACK
-  W = (mp_word*)XMALLOC(sizeof(mp_word) * MP_WARRAY, 0, DYNAMIC_TYPE_BIGINT);
+  W = (mp_word*)XMALLOC(sizeof(mp_word) * MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT);
   if (W == NULL)
     return MP_MEM;
 #endif
@@ -2108,8 +2452,8 @@ int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
    * an array of double precision words W[...]
    */
   {
-    register mp_word *_W;
-    register mp_digit *tmpx;
+    mp_word *_W;
+    mp_digit *tmpx;
 
     /* alias for the W[] array */
     _W   = W;
@@ -2138,13 +2482,13 @@ int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
      * by casting the value down to a mp_digit.  Note this requires
      * that W[ix-1] have  the carry cleared (see after the inner loop)
      */
-    register mp_digit mu;
+    mp_digit mu;
     mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK);
 
     /* a = a + mu * m * b**i
      *
      * This is computed in place and on the fly.  The multiplication
-     * by b**i is handled by offseting which columns the results
+     * by b**i is handled by offsetting which columns the results
      * are added to.
      *
      * Note the comba method normally doesn't handle carries in the
@@ -2156,9 +2500,9 @@ int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
      * first m->used words of W[] have the carries fixed
      */
     {
-      register int iy;
-      register mp_digit *tmpn;
-      register mp_word *_W;
+      int iy;
+      mp_digit *tmpn;
+      mp_word *_W;
 
       /* alias for the digits of the modulus */
       tmpn = n->dp;
@@ -2181,8 +2525,8 @@ int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
    * significant digits we zeroed].
    */
   {
-    register mp_digit *tmpx;
-    register mp_word *_W, *_W1;
+    mp_digit *tmpx;
+    mp_word *_W, *_W1;
 
     /* nox fix rest of carries */
 
@@ -2213,7 +2557,7 @@ int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
       *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK));
     }
 
-    /* zero oldused digits, if the input a was larger than
+    /* zero olduse digits, if the input a was larger than
      * m->used+1 we'll have to clear the digits
      */
     for (; ix < olduse; ix++) {
@@ -2226,7 +2570,7 @@ int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
   mp_clamp (x);
 
 #ifdef WOLFSSL_SMALL_STACK
-  XFREE(W, 0, DYNAMIC_TYPE_BIGINT);
+  XFREE(W, NULL, DYNAMIC_TYPE_BIGINT);
 #endif
 
   /* if A >= m then A = A - m */
@@ -2238,8 +2582,7 @@ int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
 
 
 /* computes xR**-1 == x (mod N) via Montgomery Reduction */
-int
-mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
+int mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
 {
   int     ix, res, digs;
   mp_digit mu;
@@ -2251,7 +2594,7 @@ mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
    * are fixed up in the inner loop.
    */
   digs = n->used * 2 + 1;
-  if ((digs < MP_WARRAY) &&
+  if ((digs < (int)MP_WARRAY) &&
       n->used <
       (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
     return fast_mp_montgomery_reduce (x, n, rho);
@@ -2278,9 +2621,9 @@ mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
 
     /* a = a + mu * m * b**i */
     {
-      register int iy;
-      register mp_digit *tmpn, *tmpx, u;
-      register mp_word r;
+      int iy;
+      mp_digit *tmpn, *tmpx, u;
+      mp_word r;
 
       /* alias for digits of the modulus */
       tmpn = n->dp;
@@ -2360,8 +2703,7 @@ void mp_dr_setup(mp_int *a, mp_digit *d)
  *
  * Input x must be in the range 0 <= x <= (n-1)**2
  */
-int
-mp_dr_reduce (mp_int * x, mp_int * n, mp_digit k)
+int mp_dr_reduce (mp_int * x, mp_int * n, mp_digit k)
 {
   int      err, i, m;
   mp_word  r;
@@ -2413,7 +2755,9 @@ top:
    * Each successive "recursion" makes the input smaller and smaller.
    */
   if (mp_cmp_mag (x, n) != MP_LT) {
-    s_mp_sub(x, n, x);
+    if ((err = s_mp_sub(x, n, x)) != MP_OKAY) {
+        return err;
+    }
     goto top;
   }
   return MP_OKAY;
@@ -2450,7 +2794,9 @@ top:
    }
 
    if (mp_cmp_mag(a, n) != MP_LT) {
-      s_mp_sub(a, n, a);
+      if ((res = s_mp_sub(a, n, a)) != MP_OKAY) {
+         goto ERR;
+      }
       goto top;
    }
 
@@ -2487,37 +2833,49 @@ int mp_reduce_2k_setup(mp_int *a, mp_digit *d)
 }
 
 
-/* computes a = 2**b
- *
- * Simple algorithm which zeroes the int, grows it then just sets one bit
- * as required.
- */
-int
-mp_2expt (mp_int * a, int b)
+/* set the b bit of a */
+int mp_set_bit (mp_int * a, int b)
 {
-  int     res;
+    int i = b / DIGIT_BIT, res;
 
-  /* zero a as per default */
-  mp_zero (a);
+    /*
+     * Require:
+     *  bit index b >= 0
+     *  a->alloc == a->used == 0 if a->dp == NULL
+     */
+    if (b < 0 || (a->dp == NULL && (a->alloc != 0 || a->used != 0)))
+        return MP_VAL;
 
-  /* grow a to accomodate the single bit */
-  if ((res = mp_grow (a, b / DIGIT_BIT + 1)) != MP_OKAY) {
-    return res;
-  }
+    if (a->dp == NULL || a->used < (int)(i + 1)) {
+        /* grow a to accommodate the single bit */
+        if ((res = mp_grow (a, i + 1)) != MP_OKAY) {
+            return res;
+        }
 
-  /* set the used count of where the bit will go */
-  a->used = b / DIGIT_BIT + 1;
+        /* set the used count of where the bit will go */
+        a->used = (int)(i + 1);
+    }
 
-  /* put the single bit in its place */
-  a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT);
+    /* put the single bit in its place */
+    a->dp[i] |= ((mp_digit)1) << (b % DIGIT_BIT);
 
-  return MP_OKAY;
+    return MP_OKAY;
 }
 
+/* computes a = 2**b
+ *
+ * Simple algorithm which zeros the int, set the required bit
+ */
+int mp_2expt (mp_int * a, int b)
+{
+    /* zero a as per default */
+    mp_zero (a);
+
+    return mp_set_bit(a, b);
+}
 
 /* multiply by a digit */
-int
-mp_mul_d (mp_int * a, mp_digit b, mp_int * c)
+int mp_mul_d (mp_int * a, mp_digit b, mp_int * c)
 {
   mp_digit u, *tmpa, *tmpc;
   mp_word  r;
@@ -2575,35 +2933,78 @@ mp_mul_d (mp_int * a, mp_digit b, mp_int * c)
 
 
 /* d = a * b (mod c) */
+#if defined(FREESCALE_LTC_TFM)
+int wolfcrypt_mp_mulmod(mp_int *a, mp_int *b, mp_int *c, mp_int *d)
+#else
 int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
+#endif
 {
   int     res;
   mp_int  t;
 
-  if ((res = mp_init (&t)) != MP_OKAY) {
+  if ((res = mp_init_size (&t, c->used)) != MP_OKAY) {
     return res;
   }
 
-  if ((res = mp_mul (a, b, &t)) != MP_OKAY) {
-    mp_clear (&t);
+  res = mp_mul (a, b, &t);
+  if (res == MP_OKAY) {
+      res = mp_mod (&t, c, d);
+  }
+
+  mp_clear (&t);
+  return res;
+}
+
+
+/* d = a - b (mod c) */
+int mp_submod(mp_int* a, mp_int* b, mp_int* c, mp_int* d)
+{
+  int     res;
+  mp_int  t;
+
+  if ((res = mp_init (&t)) != MP_OKAY) {
     return res;
   }
-  res = mp_mod (&t, c, d);
+
+  res = mp_sub (a, b, &t);
+  if (res == MP_OKAY) {
+      res = mp_mod (&t, c, d);
+  }
+
   mp_clear (&t);
+
   return res;
 }
 
+/* d = a + b (mod c) */
+int mp_addmod(mp_int* a, mp_int* b, mp_int* c, mp_int* d)
+{
+   int     res;
+   mp_int  t;
+
+   if ((res = mp_init (&t)) != MP_OKAY) {
+     return res;
+   }
+
+   res = mp_add (a, b, &t);
+   if (res == MP_OKAY) {
+       res = mp_mod (&t, c, d);
+   }
+
+   mp_clear (&t);
+
+   return res;
+}
 
 /* computes b = a*a */
-int
-mp_sqr (mp_int * a, mp_int * b)
+int mp_sqr (mp_int * a, mp_int * b)
 {
   int     res;
 
   {
 #ifdef BN_FAST_S_MP_SQR_C
     /* can we use the fast comba multiplier? */
-    if ((a->used * 2 + 1) < MP_WARRAY &&
+    if ((a->used * 2 + 1) < (int)MP_WARRAY &&
          a->used <
          (1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
       res = fast_s_mp_sqr (a, b);
@@ -2621,12 +3022,17 @@ mp_sqr (mp_int * a, mp_int * b)
 
 
 /* high level multiplication (handles sign) */
+#if defined(FREESCALE_LTC_TFM)
+int wolfcrypt_mp_mul(mp_int *a, mp_int *b, mp_int *c)
+#else
 int mp_mul (mp_int * a, mp_int * b, mp_int * c)
+#endif
 {
   int     res, neg;
   neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
 
   {
+#ifdef BN_FAST_S_MP_MUL_DIGS_C
     /* can we use the fast multiplier?
      *
      * The fast multiplier can be used if the output will
@@ -2635,8 +3041,7 @@ int mp_mul (mp_int * a, mp_int * b, mp_int * c)
      */
     int     digs = a->used + b->used + 1;
 
-#ifdef BN_FAST_S_MP_MUL_DIGS_C
-    if ((digs < MP_WARRAY) &&
+    if ((digs < (int)MP_WARRAY) &&
         MIN(a->used, b->used) <=
         (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
       res = fast_s_mp_mul_digs (a, b, c, digs);
@@ -2659,7 +3064,7 @@ int mp_mul_2(mp_int * a, mp_int * b)
 {
   int     x, res, oldused;
 
-  /* grow to accomodate result */
+  /* grow to accommodate result */
   if (b->alloc < a->used + 1) {
     if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) {
       return res;
@@ -2670,7 +3075,7 @@ int mp_mul_2(mp_int * a, mp_int * b)
   b->used = a->used;
 
   {
-    register mp_digit r, rr, *tmpa, *tmpb;
+    mp_digit r, rr, *tmpa, *tmpb;
 
     /* alias for source */
     tmpa = a->dp;
@@ -2688,7 +3093,7 @@ int mp_mul_2(mp_int * a, mp_int * b)
       rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1));
 
       /* now shift up this digit, add in the carry [from the previous] */
-      *tmpb++ = ((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK;
+      *tmpb++ = (mp_digit)(((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK);
 
       /* copy the carry that would be from the source
        * digit into the next iteration
@@ -2717,8 +3122,7 @@ int mp_mul_2(mp_int * a, mp_int * b)
 
 
 /* divide by three (based on routine from MPI and the GMP manual) */
-int
-mp_div_3 (mp_int * a, mp_int *c, mp_digit * d)
+int mp_div_3 (mp_int * a, mp_int *c, mp_digit * d)
 {
   mp_int   q;
   mp_word  w, t;
@@ -2783,7 +3187,7 @@ int mp_init_size (mp_int * a, int size)
   size += (MP_PREC * 2) - (size % MP_PREC);
 
   /* alloc mem */
-  a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * size, 0,
+  a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * size, NULL,
                                       DYNAMIC_TYPE_BIGINT);
   if (a->dp == NULL) {
     return MP_MEM;
@@ -2793,6 +3197,9 @@ int mp_init_size (mp_int * a, int size)
   a->used  = 0;
   a->alloc = size;
   a->sign  = MP_ZPOS;
+#ifdef HAVE_WOLF_BIGINT
+  wc_bigint_init(&a->raw);
+#endif
 
   /* zero the digits */
   for (x = 0; x < size; x++) {
@@ -2832,11 +3239,11 @@ int fast_s_mp_sqr (mp_int * a, mp_int * b)
     }
   }
 
-  if (pa > MP_WARRAY)
+  if (pa > (int)MP_WARRAY)
     return MP_RANGE;  /* TAO range check */
 
 #ifdef WOLFSSL_SMALL_STACK
-  W = (mp_digit*)XMALLOC(sizeof(mp_digit) * MP_WARRAY, 0, DYNAMIC_TYPE_BIGINT);
+  W = (mp_digit*)XMALLOC(sizeof(mp_digit) * MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT);
   if (W == NULL)
     return MP_MEM;
 #endif
@@ -2859,7 +3266,7 @@ int fast_s_mp_sqr (mp_int * a, mp_int * b)
       tmpx = a->dp + tx;
       tmpy = a->dp + ty;
 
-      /* this is the number of times the loop will iterrate, essentially
+      /* this is the number of times the loop will iterate, essentially
          while (tx++ < a->used && ty-- >= 0) { ... }
        */
       iy = MIN(a->used-tx, ty+1);
@@ -2898,7 +3305,7 @@ int fast_s_mp_sqr (mp_int * a, mp_int * b)
     mp_digit *tmpb;
     tmpb = b->dp;
     for (ix = 0; ix < pa; ix++) {
-      *tmpb++ = W[ix] & MP_MASK;
+      *tmpb++ = (mp_digit)(W[ix] & MP_MASK);
     }
 
     /* clear unused digits [that existed in the old copy of c] */
@@ -2909,7 +3316,7 @@ int fast_s_mp_sqr (mp_int * a, mp_int * b)
   mp_clamp (b);
 
 #ifdef WOLFSSL_SMALL_STACK
-  XFREE(W, 0, DYNAMIC_TYPE_BIGINT);
+  XFREE(W, NULL, DYNAMIC_TYPE_BIGINT);
 #endif
 
   return MP_OKAY;
@@ -2940,7 +3347,7 @@ int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
 #else
   mp_digit W[MP_WARRAY];
 #endif
-  register mp_word  _W;
+  mp_word  _W;
 
   /* grow the destination as required */
   if (c->alloc < digs) {
@@ -2951,11 +3358,11 @@ int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
 
   /* number of output digits to produce */
   pa = MIN(digs, a->used + b->used);
-  if (pa > MP_WARRAY)
+  if (pa > (int)MP_WARRAY)
     return MP_RANGE;  /* TAO range check */
 
 #ifdef WOLFSSL_SMALL_STACK
-  W = (mp_digit*)XMALLOC(sizeof(mp_digit) * MP_WARRAY, 0, DYNAMIC_TYPE_BIGINT);
+  W = (mp_digit*)XMALLOC(sizeof(mp_digit) * MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT);
   if (W == NULL)
     return MP_MEM;
 #endif
@@ -2975,7 +3382,7 @@ int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
       tmpx = a->dp + tx;
       tmpy = b->dp + ty;
 
-      /* this is the number of times the loop will iterrate, essentially
+      /* this is the number of times the loop will iterate, essentially
          while (tx++ < a->used && ty-- >= 0) { ... }
        */
       iy = MIN(a->used-tx, ty+1);
@@ -2987,7 +3394,7 @@ int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
       }
 
       /* store term */
-      W[ix] = ((mp_digit)_W) & MP_MASK;
+      W[ix] = (mp_digit)(((mp_digit)_W) & MP_MASK);
 
       /* make next carry */
       _W = _W >> ((mp_word)DIGIT_BIT);
@@ -2998,9 +3405,9 @@ int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
   c->used = pa;
 
   {
-    register mp_digit *tmpc;
+    mp_digit *tmpc;
     tmpc = c->dp;
-    for (ix = 0; ix < pa+1; ix++) {
+    for (ix = 0; ix < pa; ix++) { /* JRB, +1 could read uninitialized data */
       /* now extract the previous digit [below the carry] */
       *tmpc++ = W[ix];
     }
@@ -3013,7 +3420,7 @@ int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
   mp_clamp (c);
 
 #ifdef WOLFSSL_SMALL_STACK
-  XFREE(W, 0, DYNAMIC_TYPE_BIGINT);
+  XFREE(W, NULL, DYNAMIC_TYPE_BIGINT);
 #endif
 
   return MP_OKAY;
@@ -3084,7 +3491,7 @@ int s_mp_sqr (mp_int * a, mp_int * b)
 }
 
 
-/* multiplies |a| * |b| and only computes upto digs digits of result
+/* multiplies |a| * |b| and only computes up to digs digits of result
  * HAC pp. 595, Algorithm 14.12  Modified so you can control how
  * many digits of output are created.
  */
@@ -3097,7 +3504,7 @@ int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
   mp_digit tmpx, *tmpt, *tmpy;
 
   /* can we use the fast multiplier? */
-  if (((digs) < MP_WARRAY) &&
+  if ((digs < (int)MP_WARRAY) &&
       MIN (a->used, b->used) <
           (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
     return fast_s_mp_mul_digs (a, b, c, digs);
@@ -3157,8 +3564,8 @@ int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
 /*
  * shifts with subtractions when the result is greater than b.
  *
- * The method is slightly modified to shift B unconditionally upto just under
- * the leading bit of b.  This saves alot of multiple precision shifting.
+ * The method is slightly modified to shift B unconditionally up to just under
+ * the leading bit of b.  This saves a lot of multiple precision shifting.
  */
 int mp_montgomery_calc_normalization (mp_int * a, mp_int * b)
 {
@@ -3168,15 +3575,17 @@ int mp_montgomery_calc_normalization (mp_int * a, mp_int * b)
   bits = mp_count_bits (b) % DIGIT_BIT;
 
   if (b->used > 1) {
-     if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) {
+     if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1))
+         != MP_OKAY) {
         return res;
      }
   } else {
-     mp_set(a, 1);
+     if ((res = mp_set(a, 1)) != MP_OKAY) {
+        return res;
+     }
      bits = 1;
   }
 
-
   /* now compute C = A * B mod b */
   for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
     if ((res = mp_mul_2 (a, a)) != MP_OKAY) {
@@ -3312,7 +3721,9 @@ int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
   if ((err = mp_init (&res)) != MP_OKAY) {
     goto LBL_MU;
   }
-  mp_set (&res, 1);
+  if ((err = mp_set (&res, 1)) != MP_OKAY) {
+    goto LBL_MU;
+  }
 
   /* set initial mode and bit cnt */
   mode   = 0;
@@ -3427,7 +3838,7 @@ LBL_M:
 
 
 /* pre-calculate the value required for Barrett reduction
- * For a given modulus "b" it calulates the value required in "a"
+ * For a given modulus "b" it calculates the value required in "a"
  */
 int mp_reduce_setup (mp_int * a, mp_int * b)
 {
@@ -3499,7 +3910,8 @@ int mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
 
   /* If x < 0, add b**(k+1) to it */
   if (mp_cmp_d (x, 0) == MP_LT) {
-    mp_set (&q, 1);
+    if ((res = mp_set (&q, 1)) != MP_OKAY)
+        goto CLEANUP;
     if ((res = mp_lshd (&q, um + 1)) != MP_OKAY)
       goto CLEANUP;
     if ((res = mp_add (x, &q, x)) != MP_OKAY)
@@ -3551,7 +3963,9 @@ top:
    }
 
    if (mp_cmp_mag(a, n) != MP_LT) {
-      s_mp_sub(a, n, a);
+      if ((res = s_mp_sub(a, n, a)) != MP_OKAY) {
+         goto ERR;
+      }
       goto top;
    }
 
@@ -3588,8 +4002,7 @@ ERR:
 /* multiplies |a| * |b| and does not compute the lower digs digits
  * [meant to get the higher part of the product]
  */
-int
-s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
+int s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
 {
   mp_int  t;
   int     res, pa, pb, ix, iy;
@@ -3599,8 +4012,9 @@ s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
 
   /* can we use the fast multiplier? */
 #ifdef BN_FAST_S_MP_MUL_HIGH_DIGS_C
-  if (((a->used + b->used + 1) < MP_WARRAY)
-      && MIN (a->used, b->used) < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
+  if (((a->used + b->used + 1) < (int)MP_WARRAY)
+      && MIN (a->used, b->used) <
+      (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
     return fast_s_mp_mul_high_digs (a, b, c, digs);
   }
 #endif
@@ -3612,7 +4026,7 @@ s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
 
   pa = a->used;
   pb = b->used;
-  for (ix = 0; ix < pa; ix++) {
+  for (ix = 0; ix < pa && a->dp; ix++) {
     /* clear the carry */
     u = 0;
 
@@ -3665,6 +4079,10 @@ int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
 #endif
   mp_word  _W;
 
+  if (a->dp == NULL) { /* JRB, avoid reading uninitialized values */
+      return MP_VAL;
+  }
+
   /* grow the destination as required */
   pa = a->used + b->used;
   if (c->alloc < pa) {
@@ -3673,11 +4091,11 @@ int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
     }
   }
 
-  if (pa > MP_WARRAY)
+  if (pa > (int)MP_WARRAY)
     return MP_RANGE;  /* TAO range check */
 
 #ifdef WOLFSSL_SMALL_STACK
-  W = (mp_digit*)XMALLOC(sizeof(mp_digit) * MP_WARRAY, 0, DYNAMIC_TYPE_BIGINT);
+  W = (mp_digit*)XMALLOC(sizeof(mp_digit) * MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT);
   if (W == NULL)
     return MP_MEM;
 #endif
@@ -3685,7 +4103,7 @@ int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
   /* number of output digits to produce */
   pa = a->used + b->used;
   _W = 0;
-  for (ix = digs; ix < pa; ix++) {
+  for (ix = digs; ix < pa; ix++) { /* JRB, have a->dp check at top of function*/
       int      tx, ty, iy;
       mp_digit *tmpx, *tmpy;
 
@@ -3697,7 +4115,7 @@ int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
       tmpx = a->dp + tx;
       tmpy = b->dp + ty;
 
-      /* this is the number of times the loop will iterrate, essentially its
+      /* this is the number of times the loop will iterate, essentially its
          while (tx++ < a->used && ty-- >= 0) { ... }
        */
       iy = MIN(a->used-tx, ty+1);
@@ -3708,7 +4126,7 @@ int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
       }
 
       /* store term */
-      W[ix] = ((mp_digit)_W) & MP_MASK;
+      W[ix] = (mp_digit)(((mp_digit)_W) & MP_MASK);
 
       /* make next carry */
       _W = _W >> ((mp_word)DIGIT_BIT);
@@ -3719,10 +4137,10 @@ int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
   c->used = pa;
 
   {
-    register mp_digit *tmpc;
+    mp_digit *tmpc;
 
     tmpc = c->dp + digs;
-    for (ix = digs; ix <= pa; ix++) {
+    for (ix = digs; ix < pa; ix++) {   /* TAO, <= could potentially overwrite */
       /* now extract the previous digit [below the carry] */
       *tmpc++ = W[ix];
     }
@@ -3735,32 +4153,40 @@ int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
   mp_clamp (c);
 
 #ifdef WOLFSSL_SMALL_STACK
-  XFREE(W, 0, DYNAMIC_TYPE_BIGINT);
+  XFREE(W, NULL, DYNAMIC_TYPE_BIGINT);
 #endif
 
   return MP_OKAY;
 }
 
 
-/* set a 32-bit const */
+#ifndef MP_SET_CHUNK_BITS
+    #define MP_SET_CHUNK_BITS 4
+#endif
 int mp_set_int (mp_int * a, unsigned long b)
 {
-  int     x, res;
+  int x, res;
+
+  /* use direct mp_set if b is less than mp_digit max */
+  if (b < MP_DIGIT_MAX) {
+    return mp_set (a, (mp_digit)b);
+  }
 
   mp_zero (a);
 
-  /* set four bits at a time */
-  for (x = 0; x < 8; x++) {
-    /* shift the number up four bits */
-    if ((res = mp_mul_2d (a, 4, a)) != MP_OKAY) {
+  /* set chunk bits at a time */
+  for (x = 0; x < (int)(sizeof(b) * 8) / MP_SET_CHUNK_BITS; x++) {
+    /* shift the number up chunk bits */
+    if ((res = mp_mul_2d (a, MP_SET_CHUNK_BITS, a)) != MP_OKAY) {
       return res;
     }
 
-    /* OR in the top four bits of the source */
-    a->dp[0] |= (b >> 28) & 15;
+    /* OR in the top bits of the source */
+    a->dp[0] |= (b >> ((sizeof(b) * 8) - MP_SET_CHUNK_BITS)) &
+                                  ((1 << MP_SET_CHUNK_BITS) - 1);
 
-    /* shift the source up to the next four bits */
-    b <<= 4;
+    /* shift the source up to the next chunk bits */
+    b <<= MP_SET_CHUNK_BITS;
 
     /* ensure that digits are not clamped off */
     a->used += 1;
@@ -3770,7 +4196,8 @@ int mp_set_int (mp_int * a, unsigned long b)
 }
 
 
-#if defined(WOLFSSL_KEY_GEN) || defined(HAVE_ECC)
+#if defined(WOLFSSL_KEY_GEN) || defined(HAVE_ECC) || !defined(NO_RSA) || \
+    !defined(NO_DSA) | !defined(NO_DH)
 
 /* c = a * a (mod b) */
 int mp_sqrmod (mp_int * a, mp_int * b, mp_int * c)
@@ -3794,7 +4221,10 @@ int mp_sqrmod (mp_int * a, mp_int * b, mp_int * c)
 #endif
 
 
-#if defined(HAVE_ECC) || !defined(NO_PWDBASED) || defined(WOLFSSL_SNIFFER) || defined(WOLFSSL_HAVE_WOLFSCEP) || defined(WOLFSSL_KEY_GEN)
+#if defined(HAVE_ECC) || !defined(NO_PWDBASED) || defined(WOLFSSL_SNIFFER) || \
+    defined(WOLFSSL_HAVE_WOLFSCEP) || defined(WOLFSSL_KEY_GEN) || \
+    defined(OPENSSL_EXTRA) || defined(WC_RSA_BLINDING) || \
+    (!defined(NO_RSA) && !defined(NO_RSA_BOUNDS_CHECK))
 
 /* single digit addition */
 int mp_add_d (mp_int* a, mp_digit b, mp_int* c)
@@ -3854,7 +4284,7 @@ int mp_add_d (mp_int* a, mp_digit b, mp_int* c)
         *tmpc++ &= MP_MASK;
      }
      /* set final carry */
-     if (mu != 0 && ix < c->alloc) {
+     if (ix < c->alloc) {
         ix++;
         *tmpc++  = mu;
      }
@@ -3961,7 +4391,9 @@ int mp_sub_d (mp_int * a, mp_digit b, mp_int * c)
 #endif /* defined(HAVE_ECC) || !defined(NO_PWDBASED) */
 
 
-#if defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY)
+#if defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || defined(HAVE_ECC) || \
+    defined(DEBUG_WOLFSSL) || !defined(NO_RSA) || !defined(NO_DSA) || \
+    !defined(NO_DH)
 
 static const int lnz[16] = {
    4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
@@ -3971,16 +4403,17 @@ static const int lnz[16] = {
 int mp_cnt_lsb(mp_int *a)
 {
     int x;
-    mp_digit q, qq;
+    mp_digit q = 0, qq;
 
     /* easy out */
-    if (mp_iszero(a) == 1) {
+    if (mp_iszero(a) == MP_YES) {
         return 0;
     }
 
     /* scan lower digits until non-zero */
-    for (x = 0; x < a->used && a->dp[x] == 0; x++);
-    q = a->dp[x];
+    for (x = 0; x < a->used && a->dp[x] == 0; x++) {}
+    if (a->dp)
+        q = a->dp[x];
     x *= DIGIT_BIT;
 
     /* now scan this digit until a 1 is found */
@@ -4021,7 +4454,7 @@ static int mp_div_d (mp_int * a, mp_digit b, mp_int * c, mp_digit * d)
   mp_int  q;
   mp_word w;
   mp_digit t;
-  int     res, ix;
+  int     res = MP_OKAY, ix;
 
   /* cannot divide by zero */
   if (b == 0) {
@@ -4029,7 +4462,7 @@ static int mp_div_d (mp_int * a, mp_digit b, mp_int * c, mp_digit * d)
   }
 
   /* quick outs */
-  if (b == 1 || mp_iszero(a) == 1) {
+  if (b == 1 || mp_iszero(a) == MP_YES) {
      if (d != NULL) {
         *d = 0;
      }
@@ -4058,12 +4491,21 @@ static int mp_div_d (mp_int * a, mp_digit b, mp_int * c, mp_digit * d)
 #endif
 
   /* no easy answer [c'est la vie].  Just division */
-  if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
-     return res;
+  if (c != NULL) {
+      if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
+         return res;
+      }
+
+      q.used = a->used;
+      q.sign = a->sign;
+  }
+  else {
+      if ((res = mp_init(&q)) != MP_OKAY) {
+         return res;
+      }
   }
 
-  q.used = a->used;
-  q.sign = a->sign;
+
   w = 0;
   for (ix = a->used - 1; ix >= 0; ix--) {
      w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
@@ -4074,7 +4516,8 @@ static int mp_div_d (mp_int * a, mp_digit b, mp_int * c, mp_digit * d)
       } else {
         t = 0;
       }
-      q.dp[ix] = (mp_digit)t;
+      if (c != NULL)
+        q.dp[ix] = (mp_digit)t;
   }
 
   if (d != NULL) {
@@ -4096,11 +4539,11 @@ int mp_mod_d (mp_int * a, mp_digit b, mp_digit * c)
   return mp_div_d(a, b, NULL, c);
 }
 
-#endif /* defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) */
+#endif /* WOLFSSL_KEY_GEN || HAVE_COMP_KEY || HAVE_ECC || DEBUG_WOLFSSL */
 
-#ifdef WOLFSSL_KEY_GEN
+#if defined(WOLFSSL_KEY_GEN) || !defined(NO_DH) || !defined(NO_DSA) || !defined(NO_RSA)
 
-const mp_digit ltm_prime_tab[] = {
+const mp_digit ltm_prime_tab[PRIME_SIZE] = {
   0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
   0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
   0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
@@ -4189,9 +4632,30 @@ static int mp_prime_miller_rabin (mp_int * a, mp_int * b, int *result)
   if ((err = mp_init (&y)) != MP_OKAY) {
     goto LBL_R;
   }
-  if ((err = mp_exptmod (b, &r, a, &y)) != MP_OKAY) {
-    goto LBL_Y;
-  }
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
+#ifndef WOLFSSL_SP_NO_2048
+  if (mp_count_bits(a) == 1024)
+      err = sp_ModExp_1024(b, &r, a, &y);
+  else if (mp_count_bits(a) == 2048)
+      err = sp_ModExp_2048(b, &r, a, &y);
+  else
+#endif
+#ifndef WOLFSSL_SP_NO_3072
+  if (mp_count_bits(a) == 1536)
+      err = sp_ModExp_1536(b, &r, a, &y);
+  else if (mp_count_bits(a) == 3072)
+      err = sp_ModExp_3072(b, &r, a, &y);
+  else
+#endif
+#ifdef WOLFSSL_SP_4096
+  if (mp_count_bits(a) == 4096)
+      err = sp_ModExp_4096(b, &r, a, &y);
+  else
+#endif
+#endif
+      err = mp_exptmod (b, &r, a, &y);
+  if (err != MP_OKAY)
+      goto LBL_Y;
 
   /* if y != 1 and y != n1 do */
   if (mp_cmp_d (&y, 1) != MP_EQ && mp_cmp (&y, &n1) != MP_EQ) {
@@ -4254,7 +4718,6 @@ static int mp_prime_is_divisible (mp_int * a, int *result)
   return MP_OKAY;
 }
 
-
 /*
  * Sets result to 1 if probably prime, 0 otherwise
  */
@@ -4271,10 +4734,15 @@ int mp_prime_is_prime (mp_int * a, int t, int *result)
     return MP_VAL;
   }
 
+  if (mp_isone(a)) {
+      *result = MP_NO;
+      return MP_OKAY;
+  }
+
   /* is the input equal to one of the primes in the table? */
   for (ix = 0; ix < PRIME_SIZE; ix++) {
       if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) {
-         *result = 1;
+         *result = MP_YES;
          return MP_OKAY;
       }
   }
@@ -4296,7 +4764,9 @@ int mp_prime_is_prime (mp_int * a, int t, int *result)
 
   for (ix = 0; ix < t; ix++) {
     /* set the prime */
-    mp_set (&b, ltm_prime_tab[ix]);
+    if ((err = mp_set (&b, ltm_prime_tab[ix])) != MP_OKAY) {
+        goto LBL_B;
+    }
 
     if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) {
       goto LBL_B;
@@ -4314,6 +4784,177 @@ LBL_B:mp_clear (&b);
 }
 
 
+/*
+ * Sets result to 1 if probably prime, 0 otherwise
+ */
+int mp_prime_is_prime_ex (mp_int * a, int t, int *result, WC_RNG *rng)
+{
+  mp_int  b, c;
+  int     ix, err, res;
+  byte*   base = NULL;
+  word32  baseSz = 0;
+
+  /* default to no */
+  *result = MP_NO;
+
+  /* valid value of t? */
+  if (t <= 0 || t > PRIME_SIZE) {
+    return MP_VAL;
+  }
+
+  if (mp_isone(a)) {
+    *result = MP_NO;
+    return MP_OKAY;
+  }
+
+  /* is the input equal to one of the primes in the table? */
+  for (ix = 0; ix < PRIME_SIZE; ix++) {
+      if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) {
+         *result = MP_YES;
+         return MP_OKAY;
+      }
+  }
+
+  /* first perform trial division */
+  if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) {
+    return err;
+  }
+
+  /* return if it was trivially divisible */
+  if (res == MP_YES) {
+    return MP_OKAY;
+  }
+
+  /* now perform the miller-rabin rounds */
+  if ((err = mp_init (&b)) != MP_OKAY) {
+    return err;
+  }
+  if ((err = mp_init (&c)) != MP_OKAY) {
+      mp_clear(&b);
+    return err;
+  }
+
+  baseSz = mp_count_bits(a);
+  baseSz = (baseSz / 8) + ((baseSz % 8) ? 1 : 0);
+
+  base = (byte*)XMALLOC(baseSz, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+  if (base == NULL) {
+      err = MP_MEM;
+      goto LBL_B;
+  }
+
+  if ((err = mp_sub_d(a, 2, &c)) != MP_OKAY) {
+      goto LBL_B;
+  }
+
+ /* now do a miller rabin with up to t random numbers, this should
+  * give a (1/4)^t chance of a false prime. */
+  for (ix = 0; ix < t; ix++) {
+    /* Set a test candidate. */
+    if ((err = wc_RNG_GenerateBlock(rng, base, baseSz)) != 0) {
+        goto LBL_B;
+    }
+
+    if ((err = mp_read_unsigned_bin(&b, base, baseSz)) != MP_OKAY) {
+        goto LBL_B;
+    }
+
+    if (mp_cmp_d(&b, 2) != MP_GT || mp_cmp(&b, &c) != MP_LT) {
+        ix--;
+        continue;
+    }
+
+    if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) {
+      goto LBL_B;
+    }
+
+    if (res == MP_NO) {
+      goto LBL_B;
+    }
+  }
+
+  /* passed the test */
+  *result = MP_YES;
+LBL_B:mp_clear (&b);
+      mp_clear (&c);
+      XFREE(base, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+  return err;
+}
+
+#endif /* WOLFSSL_KEY_GEN NO_DH NO_DSA NO_RSA */
+
+#ifdef WOLFSSL_KEY_GEN
+
+static const int USE_BBS = 1;
+
+int mp_rand_prime(mp_int* N, int len, WC_RNG* rng, void* heap)
+{
+    int   err, res, type;
+    byte* buf;
+
+    if (N == NULL || rng == NULL)
+        return MP_VAL;
+
+    /* get type */
+    if (len < 0) {
+        type = USE_BBS;
+        len = -len;
+    } else {
+        type = 0;
+    }
+
+    /* allow sizes between 2 and 512 bytes for a prime size */
+    if (len < 2 || len > 512) {
+        return MP_VAL;
+    }
+
+    /* allocate buffer to work with */
+    buf = (byte*)XMALLOC(len, heap, DYNAMIC_TYPE_RSA);
+    if (buf == NULL) {
+        return MP_MEM;
+    }
+    XMEMSET(buf, 0, len);
+
+    do {
+#ifdef SHOW_GEN
+        printf(".");
+        fflush(stdout);
+#endif
+        /* generate value */
+        err = wc_RNG_GenerateBlock(rng, buf, len);
+        if (err != 0) {
+            XFREE(buf, heap, DYNAMIC_TYPE_RSA);
+            return err;
+        }
+
+        /* munge bits */
+        buf[0]     |= 0x80 | 0x40;
+        buf[len-1] |= 0x01 | ((type & USE_BBS) ? 0x02 : 0x00);
+
+        /* load value */
+        if ((err = mp_read_unsigned_bin(N, buf, len)) != MP_OKAY) {
+            XFREE(buf, heap, DYNAMIC_TYPE_RSA);
+            return err;
+        }
+
+        /* test */
+        /* Running Miller-Rabin up to 3 times gives us a 2^{-80} chance
+         * of a 1024-bit candidate being a false positive, when it is our
+         * prime candidate. (Note 4.49 of Handbook of Applied Cryptography.)
+         * Using 8 because we've always used 8. */
+        if ((err = mp_prime_is_prime_ex(N, 8, &res, rng)) != MP_OKAY) {
+            XFREE(buf, heap, DYNAMIC_TYPE_RSA);
+            return err;
+        }
+    } while (res == MP_NO);
+
+    XMEMSET(buf, 0, len);
+    XFREE(buf, heap, DYNAMIC_TYPE_RSA);
+
+    return MP_OKAY;
+}
+
+
 /* computes least common multiple as |a*b|/(a, b) */
 int mp_lcm (mp_int * a, mp_int * b, mp_int * c)
 {
@@ -4411,7 +5052,7 @@ int mp_gcd (mp_int * a, mp_int * b, mp_int * c)
         }
     }
 
-    while (mp_iszero(&v) == 0) {
+    while (mp_iszero(&v) == MP_NO) {
         /* make sure v is the largest */
         if (mp_cmp_mag(&u, &v) == MP_GT) {
             /* swap u and v to make sure v is >= u */
@@ -4435,21 +5076,24 @@ int mp_gcd (mp_int * a, mp_int * b, mp_int * c)
     }
     c->sign = MP_ZPOS;
     res = MP_OKAY;
-LBL_V:mp_clear (&u);
-LBL_U:mp_clear (&v);
+LBL_V:mp_clear (&v);
+LBL_U:mp_clear (&u);
     return res;
 }
 
-
-
 #endif /* WOLFSSL_KEY_GEN */
 
 
-#ifdef HAVE_ECC
+#if !defined(NO_DSA) || defined(HAVE_ECC) || defined(WOLFSSL_KEY_GEN) || \
+    defined(HAVE_COMP_KEY) || defined(WOLFSSL_DEBUG_MATH) || \
+    defined(DEBUG_WOLFSSL) || defined(OPENSSL_EXTRA)
 
 /* chars used in radix conversions */
-const char *mp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
+const char *mp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ\
+                         abcdefghijklmnopqrstuvwxyz+/";
+#endif
 
+#if !defined(NO_DSA) || defined(HAVE_ECC)
 /* read a string [ASCII] in a given radix */
 int mp_read_radix (mp_int * a, const char *str, int radix)
 {
@@ -4460,7 +5104,7 @@ int mp_read_radix (mp_int * a, const char *str, int radix)
   mp_zero(a);
 
   /* make sure the radix is ok */
-  if (radix < 2 || radix > 64) {
+  if (radix < MP_RADIX_BIN || radix > MP_RADIX_MAX) {
     return MP_VAL;
   }
 
@@ -4478,12 +5122,12 @@ int mp_read_radix (mp_int * a, const char *str, int radix)
   mp_zero (a);
 
   /* process each digit of the string */
-  while (*str) {
-    /* if the radix < 36 the conversion is case insensitive
+  while (*str != '\0') {
+    /* if the radix <= 36 the conversion is case insensitive
      * this allows numbers like 1AB and 1ab to represent the same  value
      * [e.g. in hex]
      */
-    ch = (char) ((radix < 36) ? XTOUPPER((unsigned char)*str) : *str);
+    ch = (radix <= 36) ? (char)XTOUPPER((unsigned char)*str) : *str;
     for (y = 0; y < 64; y++) {
       if (ch == mp_s_rmap[y]) {
          break;
@@ -4507,16 +5151,170 @@ int mp_read_radix (mp_int * a, const char *str, int radix)
     ++str;
   }
 
+  /* if digit in isn't null term, then invalid character was found */
+  if (*str != '\0') {
+     mp_zero (a);
+     return MP_VAL;
+  }
+
   /* set the sign only if a != 0 */
-  if (mp_iszero(a) != 1) {
+  if (mp_iszero(a) != MP_YES) {
      a->sign = neg;
   }
   return MP_OKAY;
 }
+#endif /* !defined(NO_DSA) || defined(HAVE_ECC) */
+
+#ifdef WC_MP_TO_RADIX
+
+/* returns size of ASCII representation */
+int mp_radix_size (mp_int *a, int radix, int *size)
+{
+    int     res, digs;
+    mp_int  t;
+    mp_digit d;
+
+    *size = 0;
+
+    /* special case for binary */
+    if (radix == MP_RADIX_BIN) {
+        *size = mp_count_bits (a) + (a->sign == MP_NEG ? 1 : 0) + 1;
+        return MP_OKAY;
+    }
+
+    /* make sure the radix is in range */
+    if (radix < MP_RADIX_BIN || radix > MP_RADIX_MAX) {
+        return MP_VAL;
+    }
+
+    if (mp_iszero(a) == MP_YES) {
+        *size = 2;
+        return MP_OKAY;
+    }
+
+    /* digs is the digit count */
+    digs = 0;
+
+    /* if it's negative add one for the sign */
+    if (a->sign == MP_NEG) {
+        ++digs;
+    }
+
+    /* init a copy of the input */
+    if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
+        return res;
+    }
+
+    /* force temp to positive */
+    t.sign = MP_ZPOS;
+
+    /* fetch out all of the digits */
+    while (mp_iszero (&t) == MP_NO) {
+        if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
+            mp_clear (&t);
+            return res;
+        }
+        ++digs;
+    }
+    mp_clear (&t);
+
+    /* return digs + 1, the 1 is for the NULL byte that would be required. */
+    *size = digs + 1;
+    return MP_OKAY;
+}
+
+/* stores a bignum as a ASCII string in a given radix (2..64) */
+int mp_toradix (mp_int *a, char *str, int radix)
+{
+    int     res, digs;
+    mp_int  t;
+    mp_digit d;
+    char   *_s = str;
+
+    /* check range of the radix */
+    if (radix < MP_RADIX_BIN || radix > MP_RADIX_MAX) {
+        return MP_VAL;
+    }
+
+    /* quick out if its zero */
+    if (mp_iszero(a) == MP_YES) {
+        *str++ = '0';
+        *str = '\0';
+        return MP_OKAY;
+    }
+
+    if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
+        return res;
+    }
+
+    /* if it is negative output a - */
+    if (t.sign == MP_NEG) {
+        ++_s;
+        *str++ = '-';
+        t.sign = MP_ZPOS;
+    }
+
+    digs = 0;
+    while (mp_iszero (&t) == MP_NO) {
+        if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
+            mp_clear (&t);
+            return res;
+        }
+        *str++ = mp_s_rmap[d];
+        ++digs;
+    }
+#ifndef WC_DISABLE_RADIX_ZERO_PAD
+    /* For hexadecimal output, add zero padding when number of digits is odd */
+    if ((digs & 1) && (radix == 16)) {
+        *str++ = mp_s_rmap[0];
+        ++digs;
+    }
+#endif
+    /* reverse the digits of the string.  In this case _s points
+     * to the first digit [excluding the sign] of the number]
+     */
+    bn_reverse ((unsigned char *)_s, digs);
+
+    /* append a NULL so the string is properly terminated */
+    *str = '\0';
+
+    mp_clear (&t);
+    return MP_OKAY;
+}
+
+#ifdef WOLFSSL_DEBUG_MATH
+void mp_dump(const char* desc, mp_int* a, byte verbose)
+{
+  char *buffer;
+  int size = a->alloc;
+
+  buffer = (char*)XMALLOC(size * sizeof(mp_digit) * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+  if (buffer == NULL) {
+    return;
+  }
+
+  printf("%s: ptr=%p, used=%d, sign=%d, size=%d, mpd=%d\n",
+    desc, a, a->used, a->sign, size, (int)sizeof(mp_digit));
+
+  mp_tohex(a, buffer);
+  printf("  %s\n  ", buffer);
+
+  if (verbose) {
+    int i;
+    for(i=0; i<a->alloc * (int)sizeof(mp_digit); i++) {
+      printf("%02x ", *(((byte*)a->dp) + i));
+    }
+    printf("\n");
+  }
+
+  XFREE(buffer, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+}
+#endif /* WOLFSSL_DEBUG_MATH */
 
-#endif /* HAVE_ECC */
+#endif /* WC_MP_TO_RADIX */
+
+#endif /* WOLFSSL_SP_MATH */
 
 #endif /* USE_FAST_MATH */
 
 #endif /* NO_BIG_INT */
-