1 files changed, 214 insertions, 0 deletions
diff --git a/security/nss/lib/freebl/mpi/mpvalpha.c b/security/nss/lib/freebl/mpi/mpvalpha.c
new file mode 100644
index 000000000..5118e73c3
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpvalpha.c
@@ -0,0 +1,214 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Multiple Precision Integer optimization code for 
+ * the Compaq Alpha processor.
+ *
+ * The Initial Developer of the Original Code is
+ * Richard C. Swift.
+ * Portions created by the Initial Developer are Copyright (C) 2001
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):	Richard C. Swift	(swift@netscape.com)
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#include "mpi-priv.h"
+#include <c_asm.h>
+
+
+#define MP_MUL_DxD(a, b, Phi, Plo)		\
+ { Plo = asm ("mulq %a0, %a1, %v0", a, b);	\
+   Phi = asm ("umulh %a0, %a1, %v0", a, b); }	\
+
+/* This is empty for the loop in s_mpv_mul_d	*/
+#define CARRY_ADD
+
+#define ONE_MUL				\
+    a_i = *a++;				\
+    MP_MUL_DxD(a_i, b, a1b1, a0b0);	\
+    a0b0 += carry;			\
+    if (a0b0 < carry)			\
+      ++a1b1;				\
+    CARRY_ADD				\
+    *c++ = a0b0;			\
+    carry = a1b1;			\
+
+#define FOUR_MUL			\
+	ONE_MUL				\
+	ONE_MUL				\
+	ONE_MUL				\
+	ONE_MUL				\
+
+#define SIXTEEN_MUL			\
+	FOUR_MUL			\
+	FOUR_MUL			\
+	FOUR_MUL			\
+	FOUR_MUL			\
+
+#define THIRTYTWO_MUL			\
+	SIXTEEN_MUL			\
+	SIXTEEN_MUL			\
+
+#define ONETWENTYEIGHT_MUL		\
+	THIRTYTWO_MUL			\
+	THIRTYTWO_MUL			\
+	THIRTYTWO_MUL			\
+	THIRTYTWO_MUL			\
+
+
+#define EXPAND_256(CALL)		\
+ mp_digit carry = 0;			\
+ mp_digit a_i;				\
+ mp_digit a0b0, a1b1;			\
+ if (a_len &255) {			\
+	if (a_len &1) {			\
+	  ONE_MUL			\
+	}				\
+	if (a_len &2) {			\
+	  ONE_MUL			\
+	  ONE_MUL			\
+	}				\
+	if (a_len &4) {			\
+	  FOUR_MUL			\
+	}				\
+	if (a_len &8) {			\
+	  FOUR_MUL			\
+	  FOUR_MUL			\
+	}				\
+	if (a_len & 16 ) {		\
+	  SIXTEEN_MUL			\
+	}				\
+	if (a_len & 32 ) {		\
+	  THIRTYTWO_MUL			\
+	}				\
+	if (a_len & 64 ) {		\
+	  THIRTYTWO_MUL			\
+	  THIRTYTWO_MUL			\
+	}				\
+	if (a_len & 128) {		\
+	  ONETWENTYEIGHT_MUL		\
+	}				\
+	a_len = a_len & (-256);		\
+  }					\
+  if (a_len>=256 ) {			\
+	carry = CALL(a, a_len, b, c, carry);	\
+	c += a_len;			\
+  }					\
+
+#define FUNC_NAME(NAME)			\
+mp_digit NAME(const mp_digit *a, 	\
+	mp_size a_len,			\
+	mp_digit b, mp_digit *c, 	\
+	mp_digit carry)			\
+
+#define DECLARE_MUL_256(FNAME)		\
+FUNC_NAME(FNAME)			\
+{					\
+  mp_digit a_i;				\
+  mp_digit a0b0, a1b1;			\
+  while (a_len) {			\
+	ONETWENTYEIGHT_MUL		\
+	ONETWENTYEIGHT_MUL		\
+	a_len-= 256;			\
+  }					\
+  return carry;				\
+}					\
+
+/* Expanding the loop in s_mpv_mul_d appeared to slow down the
+   (admittedly) small number of tests (i.e., timetest) used to
+   measure performance, so this define disables that optimization. */
+#define DO_NOT_EXPAND 1
+
+/* Need forward declaration so it can be instantiated after
+	the routine that uses it; this helps locality somewhat	*/
+#if !defined(DO_NOT_EXPAND)
+FUNC_NAME(s_mpv_mul_d_MUL256);
+#endif
+
+/* c = a * b */
+void s_mpv_mul_d(const mp_digit *a, mp_size a_len, 
+			mp_digit b, mp_digit *c)
+{
+#if defined(DO_NOT_EXPAND)
+  mp_digit carry = 0;
+  while (a_len--) {
+    mp_digit a_i = *a++;
+    mp_digit a0b0, a1b1;
+
+    MP_MUL_DxD(a_i, b, a1b1, a0b0);
+
+    a0b0 += carry;
+    if (a0b0 < carry)
+      ++a1b1;
+    *c++ = a0b0;
+    carry = a1b1;
+  }
+#else
+  EXPAND_256(s_mpv_mul_d_MUL256)
+#endif
+  *c = carry;
+}
+
+#if !defined(DO_NOT_EXPAND)
+DECLARE_MUL_256(s_mpv_mul_d_MUL256)
+#endif
+
+#undef CARRY_ADD
+/* This is redefined for the loop in s_mpv_mul_d_add */
+#define CARRY_ADD			\
+    a0b0 += a_i = *c;			\
+    if (a0b0 < a_i)			\
+      ++a1b1;				\
+
+/* Need forward declaration so it can be instantiated between the
+	two routines that use it; this helps locality somewhat	*/
+FUNC_NAME(s_mpv_mul_d_add_MUL256);
+
+/* c += a * b */
+void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, 
+			mp_digit b, mp_digit *c)
+{
+  EXPAND_256(s_mpv_mul_d_add_MUL256)
+  *c = carry;
+}
+
+/* Instantiate multiply 256 routine here */
+DECLARE_MUL_256(s_mpv_mul_d_add_MUL256)
+
+/* Presently, this is only used by the Montgomery arithmetic code. */
+/* c += a * b */
+void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, 
+			mp_digit b, mp_digit *c)
+{
+  EXPAND_256(s_mpv_mul_d_add_MUL256)
+  while (carry) {
+    mp_digit c_i = *c;
+    carry += c_i;
+    *c++ = carry;
+    carry = carry < c_i;
+  }
+}
+