summaryrefslogtreecommitdiff
path: root/crypto/bn/bn_exp2.c
diff options
context:
space:
mode:
Diffstat (limited to 'crypto/bn/bn_exp2.c')
-rw-r--r--crypto/bn/bn_exp2.c202
1 files changed, 202 insertions, 0 deletions
diff --git a/crypto/bn/bn_exp2.c b/crypto/bn/bn_exp2.c
new file mode 100644
index 0000000000..eface739b3
--- /dev/null
+++ b/crypto/bn/bn_exp2.c
@@ -0,0 +1,202 @@
+#include <stdio.h>
+#include "cryptlib.h"
+#include "bn_lcl.h"
+
+/* I've done some timing with different table sizes.
+ * The main hassle is that even with bits set at 3, this requires
+ * 63 BIGNUMs to store the pre-calculated values.
+ * 512 1024
+ * bits=1 75.4% 79.4%
+ * bits=2 61.2% 62.4%
+ * bits=3 61.3% 59.3%
+ * The lack of speed improvment is also a function of the pre-calculation
+ * which could be removed.
+ */
+#define EXP2_TABLE_BITS 2 /* 1 2 3 4 5 */
+#define EXP2_TABLE_SIZE 4 /* 2 4 8 16 32 */
+
+int BN_mod_exp2_mont(rr,a1,p1,a2,p2,m,ctx,in_mont)
+BIGNUM *rr;
+BIGNUM *a1;
+BIGNUM *p1;
+BIGNUM *a2;
+BIGNUM *p2;
+BIGNUM *m;
+BN_CTX *ctx;
+BN_MONT_CTX *in_mont;
+ {
+ int i,j,k,bits,bits1,bits2,ret=0,wstart,wend,window,xvalue,yvalue;
+ int start=1,ts=0,x,y;
+ BIGNUM *d,*aa1,*aa2,*r;
+ BIGNUM val[EXP2_TABLE_SIZE][EXP2_TABLE_SIZE];
+ BN_MONT_CTX *mont=NULL;
+
+ bn_check_top(a1);
+ bn_check_top(p1);
+ bn_check_top(a2);
+ bn_check_top(p2);
+ bn_check_top(m);
+
+ if (!(m->d[0] & 1))
+ {
+ BNerr(BN_F_BN_MOD_EXP_MONT,BN_R_CALLED_WITH_EVEN_MODULUS);
+ return(0);
+ }
+ d= &(ctx->bn[ctx->tos++]);
+ r= &(ctx->bn[ctx->tos++]);
+ bits1=BN_num_bits(p1);
+ bits2=BN_num_bits(p2);
+ if ((bits1 == 0) && (bits2 == 0))
+ {
+ BN_one(r);
+ return(1);
+ }
+ bits=(bits1 > bits2)?bits1:bits2;
+
+ /* If this is not done, things will break in the montgomery
+ * part */
+
+ if (in_mont != NULL)
+ mont=in_mont;
+ else
+ {
+ if ((mont=BN_MONT_CTX_new()) == NULL) goto err;
+ if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
+ }
+
+ BN_init(&(val[0][0]));
+ BN_init(&(val[1][1]));
+ BN_init(&(val[0][1]));
+ BN_init(&(val[1][0]));
+ ts=1;
+ if (BN_ucmp(a1,m) >= 0)
+ {
+ BN_mod(&(val[1][0]),a1,m,ctx);
+ aa1= &(val[1][0]);
+ }
+ else
+ aa1=a1;
+ if (BN_ucmp(a2,m) >= 0)
+ {
+ BN_mod(&(val[0][1]),a2,m,ctx);
+ aa2= &(val[0][1]);
+ }
+ else
+ aa2=a2;
+ if (!BN_to_montgomery(&(val[1][0]),aa1,mont,ctx)) goto err;
+ if (!BN_to_montgomery(&(val[0][1]),aa2,mont,ctx)) goto err;
+ if (!BN_mod_mul_montgomery(&(val[1][1]),
+ &(val[1][0]),&(val[0][1]),mont,ctx))
+ goto err;
+
+#if 0
+ if (bits <= 20) /* This is probably 3 or 0x10001, so just do singles */
+ window=1;
+ else if (bits > 250)
+ window=5; /* max size of window */
+ else if (bits >= 120)
+ window=4;
+ else
+ window=3;
+#else
+ window=EXP2_TABLE_BITS;
+#endif
+
+ k=1<<window;
+ for (x=0; x<k; x++)
+ {
+ if (x >= 2)
+ {
+ BN_init(&(val[x][0]));
+ BN_init(&(val[x][1]));
+ if (!BN_mod_mul_montgomery(&(val[x][0]),
+ &(val[1][0]),&(val[x-1][0]),mont,ctx)) goto err;
+ if (!BN_mod_mul_montgomery(&(val[x][1]),
+ &(val[1][0]),&(val[x-1][1]),mont,ctx)) goto err;
+ }
+ for (y=2; y<k; y++)
+ {
+ BN_init(&(val[x][y]));
+ if (!BN_mod_mul_montgomery(&(val[x][y]),
+ &(val[x][y-1]),&(val[0][1]),mont,ctx))
+ goto err;
+ }
+ }
+ ts=k;
+
+ start=1; /* This is used to avoid multiplication etc
+ * when there is only the value '1' in the
+ * buffer. */
+ xvalue=0; /* The 'x value' of the window */
+ yvalue=0; /* The 'y value' of the window */
+ wstart=bits-1; /* The top bit of the window */
+ wend=0; /* The bottom bit of the window */
+
+ if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err;
+ for (;;)
+ {
+ xvalue=BN_is_bit_set(p1,wstart);
+ yvalue=BN_is_bit_set(p2,wstart);
+ if (!(xvalue || yvalue))
+ {
+ if (!start)
+ {
+ if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
+ goto err;
+ }
+ wstart--;
+ if (wstart < 0) break;
+ continue;
+ }
+ /* We now have wstart on a 'set' bit, we now need to work out
+ * how bit a window to do. To do this we need to scan
+ * forward until the last set bit before the end of the
+ * window */
+ j=wstart;
+ /* xvalue=BN_is_bit_set(p1,wstart); already set */
+ /* yvalue=BN_is_bit_set(p1,wstart); already set */
+ wend=0;
+ for (i=1; i<window; i++)
+ {
+ if (wstart-i < 0) break;
+ xvalue+=xvalue;
+ xvalue|=BN_is_bit_set(p1,wstart-i);
+ yvalue+=yvalue;
+ yvalue|=BN_is_bit_set(p2,wstart-i);
+ }
+
+ /* i is the size of the current window */
+ /* add the 'bytes above' */
+ if (!start)
+ for (j=0; j<i; j++)
+ {
+ if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
+ goto err;
+ }
+
+ /* wvalue will be an odd number < 2^window */
+ if (xvalue || yvalue)
+ {
+ if (!BN_mod_mul_montgomery(r,r,&(val[xvalue][yvalue]),
+ mont,ctx)) goto err;
+ }
+
+ /* move the 'window' down further */
+ wstart-=i;
+ start=0;
+ if (wstart < 0) break;
+ }
+ BN_from_montgomery(rr,r,mont,ctx);
+ ret=1;
+err:
+ if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
+ ctx->tos-=2;
+ for (i=0; i<ts; i++)
+ {
+ for (j=0; j<ts; j++)
+ {
+ BN_clear_free(&(val[i][j]));
+ }
+ }
+ return(ret);
+ }