diff options
Diffstat (limited to 'security/nss/lib/freebl/mpi/mpvalpha.c')
-rw-r--r-- | security/nss/lib/freebl/mpi/mpvalpha.c | 214 |
1 files changed, 214 insertions, 0 deletions
diff --git a/security/nss/lib/freebl/mpi/mpvalpha.c b/security/nss/lib/freebl/mpi/mpvalpha.c new file mode 100644 index 000000000..5118e73c3 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpvalpha.c @@ -0,0 +1,214 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Multiple Precision Integer optimization code for + * the Compaq Alpha processor. + * + * The Initial Developer of the Original Code is + * Richard C. Swift. + * Portions created by the Initial Developer are Copyright (C) 2001 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): Richard C. Swift (swift@netscape.com) + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#include "mpi-priv.h" +#include <c_asm.h> + + +#define MP_MUL_DxD(a, b, Phi, Plo) \ + { Plo = asm ("mulq %a0, %a1, %v0", a, b); \ + Phi = asm ("umulh %a0, %a1, %v0", a, b); } \ + +/* This is empty for the loop in s_mpv_mul_d */ +#define CARRY_ADD + +#define ONE_MUL \ + a_i = *a++; \ + MP_MUL_DxD(a_i, b, a1b1, a0b0); \ + a0b0 += carry; \ + if (a0b0 < carry) \ + ++a1b1; \ + CARRY_ADD \ + *c++ = a0b0; \ + carry = a1b1; \ + +#define FOUR_MUL \ + ONE_MUL \ + ONE_MUL \ + ONE_MUL \ + ONE_MUL \ + +#define SIXTEEN_MUL \ + FOUR_MUL \ + FOUR_MUL \ + FOUR_MUL \ + FOUR_MUL \ + +#define THIRTYTWO_MUL \ + SIXTEEN_MUL \ + SIXTEEN_MUL \ + +#define ONETWENTYEIGHT_MUL \ + THIRTYTWO_MUL \ + THIRTYTWO_MUL \ + THIRTYTWO_MUL \ + THIRTYTWO_MUL \ + + +#define EXPAND_256(CALL) \ + mp_digit carry = 0; \ + mp_digit a_i; \ + mp_digit a0b0, a1b1; \ + if (a_len &255) { \ + if (a_len &1) { \ + ONE_MUL \ + } \ + if (a_len &2) { \ + ONE_MUL \ + ONE_MUL \ + } \ + if (a_len &4) { \ + FOUR_MUL \ + } \ + if (a_len &8) { \ + FOUR_MUL \ + FOUR_MUL \ + } \ + if (a_len & 16 ) { \ + SIXTEEN_MUL \ + } \ + if (a_len & 32 ) { \ + THIRTYTWO_MUL \ + } \ + if (a_len & 64 ) { \ + THIRTYTWO_MUL \ + THIRTYTWO_MUL \ + } \ + if (a_len & 128) { \ + ONETWENTYEIGHT_MUL \ + } \ + a_len = a_len & (-256); \ + } \ + if (a_len>=256 ) { \ + carry = CALL(a, a_len, b, c, carry); \ + c += a_len; \ + } \ + +#define FUNC_NAME(NAME) \ +mp_digit NAME(const mp_digit *a, \ + mp_size a_len, \ + mp_digit b, mp_digit *c, \ + mp_digit carry) \ + +#define DECLARE_MUL_256(FNAME) \ +FUNC_NAME(FNAME) \ +{ \ + mp_digit a_i; \ + mp_digit a0b0, a1b1; \ + while (a_len) { \ + ONETWENTYEIGHT_MUL \ + ONETWENTYEIGHT_MUL \ + a_len-= 256; \ + } \ + return carry; \ +} \ + +/* Expanding the loop in s_mpv_mul_d appeared to slow down the + (admittedly) small number of tests (i.e., timetest) used to + measure performance, so this define disables that optimization. */ +#define DO_NOT_EXPAND 1 + +/* Need forward declaration so it can be instantiated after + the routine that uses it; this helps locality somewhat */ +#if !defined(DO_NOT_EXPAND) +FUNC_NAME(s_mpv_mul_d_MUL256); +#endif + +/* c = a * b */ +void s_mpv_mul_d(const mp_digit *a, mp_size a_len, + mp_digit b, mp_digit *c) +{ +#if defined(DO_NOT_EXPAND) + mp_digit carry = 0; + while (a_len--) { + mp_digit a_i = *a++; + mp_digit a0b0, a1b1; + + MP_MUL_DxD(a_i, b, a1b1, a0b0); + + a0b0 += carry; + if (a0b0 < carry) + ++a1b1; + *c++ = a0b0; + carry = a1b1; + } +#else + EXPAND_256(s_mpv_mul_d_MUL256) +#endif + *c = carry; +} + +#if !defined(DO_NOT_EXPAND) +DECLARE_MUL_256(s_mpv_mul_d_MUL256) +#endif + +#undef CARRY_ADD +/* This is redefined for the loop in s_mpv_mul_d_add */ +#define CARRY_ADD \ + a0b0 += a_i = *c; \ + if (a0b0 < a_i) \ + ++a1b1; \ + +/* Need forward declaration so it can be instantiated between the + two routines that use it; this helps locality somewhat */ +FUNC_NAME(s_mpv_mul_d_add_MUL256); + +/* c += a * b */ +void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, + mp_digit b, mp_digit *c) +{ + EXPAND_256(s_mpv_mul_d_add_MUL256) + *c = carry; +} + +/* Instantiate multiply 256 routine here */ +DECLARE_MUL_256(s_mpv_mul_d_add_MUL256) + +/* Presently, this is only used by the Montgomery arithmetic code. */ +/* c += a * b */ +void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, + mp_digit b, mp_digit *c) +{ + EXPAND_256(s_mpv_mul_d_add_MUL256) + while (carry) { + mp_digit c_i = *c; + carry += c_i; + *c++ = carry; + carry = carry < c_i; + } +} + |