MIPS assembler code to optimize inner multiply loops for mips3 CPUs.

author: nelsonb%netscape.com <devnull@localhost> 2000-08-22 00:55:10 +0000
committer: nelsonb%netscape.com <devnull@localhost> 2000-08-22 00:55:10 +0000
commit: 43ef803f2f6a7d8264c6e97e9f6e05e5e2b3fba2 (patch)
tree: 6db835422464f62cbe76adb771e1632f15cbb97b /security/nss/lib/freebl/mpi/mpi_mips.s
parent: 8e5db081096ef5ba35c5bea39eaed233f53da43d (diff)
download: nss-hg-43ef803f2f6a7d8264c6e97e9f6e05e5e2b3fba2.tar.gz
1 files changed, 433 insertions, 0 deletions
diff --git a/security/nss/lib/freebl/mpi/mpi_mips.s b/security/nss/lib/freebl/mpi/mpi_mips.s
new file mode 100644
index 000000000..18d4ff870
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_mips.s
@@ -0,0 +1,433 @@
+/*
+ * The contents of this file are subject to the Mozilla Public
+ * License Version 1.1 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.mozilla.org/MPL/
+ * 
+ * Software distributed under the License is distributed on an "AS
+ * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+ * implied. See the License for the specific language governing
+ * rights and limitations under the License.
+ * 
+ * The Original Code is the Netscape security libraries.
+ * 
+ * The Initial Developer of the Original Code is Netscape
+ * Communications Corporation.	Portions created by Netscape are 
+ * Copyright (C) 2000 Netscape Communications Corporation.  All
+ * Rights Reserved.
+ * 
+ * Contributor(s):
+ * 
+ * Alternatively, the contents of this file may be used under the
+ * terms of the GNU General Public License Version 2 or later (the
+ * "GPL"), in which case the provisions of the GPL are applicable 
+ * instead of those above.	If you wish to allow use of your 
+ * version of this file only under the terms of the GPL and not to
+ * allow others to use your version of this file under the MPL,
+ * indicate your decision by deleting the provisions above and
+ * replace them with the notice and other provisions required by
+ * the GPL.  If you do not delete the provisions above, a recipient
+ * may use your version of this file under either the MPL or the
+ * GPL.
+ *  $Id$
+ */
+#include <regdef.h>
+        .set    noreorder
+        .set    noat
+
+        .section        .text, 1, 0x00000006, 4, 4
+.text:
+        .section        .text
+
+        .ent    s_mpv_mul_d_add
+        .globl  s_mpv_mul_d_add
+
+s_mpv_mul_d_add: 
+ #/* c += a * b */
+ #void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, 
+ #			      mp_digit *c)
+ #{
+ #  mp_digit   a0, a1;	regs a4, a5
+ #  mp_digit   c0, c1;  regs a6, a7
+ #  mp_digit   cy = 0;  reg t2
+ #  mp_word    w0, w1;  regs t0, t1
+ #
+ #  if (a_len) {
+	beq	a1,zero,.L.1
+	move	t2,zero		# cy = 0
+	dsll32	a2,a2,0		# "b" is sometimes negative (?!?!)
+	dsrl32	a2,a2,0		# This clears the upper 32 bits.
+ #    a0 = a[0];
+	lwu	a4,0(a0)
+ #    w0 = ((mp_word)b * a0);
+	dmultu	a2,a4
+ #    if (--a_len) {
+	addiu	a1,a1,-1
+	beq	a1,zero,.L.2
+ #      while (a_len >= 2) {
+	sltiu	t3,a1,2
+	bne	t3,zero,.L.3
+ #	  a1     = a[1];
+	lwu	a5,4(a0)
+.L.4:
+ #	  a_len -= 2;
+        addiu	a1,a1,-2
+ #	  c0     = c[0];
+	lwu	a6,0(a3)
+ #	  w0    += cy;
+	mflo	t0
+	daddu	t0,t0,t2
+ #	  w0    += c0;
+	daddu	t0,t0,a6
+ #	  w1     = (mp_word)b * a1; 
+	dmultu	a2,a5			#
+ #	  cy     = CARRYOUT(w0);
+	dsrl32	t2,t0,0
+ #	  c[0]   = ACCUM(w0);
+	sw	t0,0(a3)
+ #	  a0     = a[2];
+	lwu	a4,8(a0)
+ #	  a     += 2;
+	addiu	a0,a0,8
+ #	  c1     = c[1];
+	lwu	a7,4(a3)
+ #	  w1    += cy;
+	mflo	t1
+	daddu	t1,t1,t2
+ #	  w1    += c1;
+	daddu	t1,t1,a7
+ #	  w0     = (mp_word)b * a0;
+	dmultu	a2,a4			#
+ #	  cy     = CARRYOUT(w1);
+	dsrl32	t2,t1,0
+ #	  c[1]   = ACCUM(w1);
+	sw	t1,4(a3)
+ #	  c     += 2;
+	addiu	a3,a3,8
+	sltiu	t3,a1,2
+	beq	t3,zero,.L.4
+ #	  a1     = a[1];
+	lwu	a5,4(a0)
+ #      }
+.L.3:
+ #      c0       = c[0];
+	lwu	a6,0(a3)
+ #      w0      += cy;
+ #      if (a_len) {
+	mflo	t0
+	beq	a1,zero,.L.5
+	daddu	t0,t0,t2
+ #	  w1     = (mp_word)b * a1; 
+	dmultu	a2,a5
+ #	  w0    += c0;
+	daddu	t0,t0,a6		#
+ #	  cy     = CARRYOUT(w0);
+	dsrl32	t2,t0,0
+ #	  c[0]   = ACCUM(w0);
+	sw	t0,0(a3)
+ #	  c1     = c[1];
+	lwu	a7,4(a3)
+ #	  w1    += cy;
+	mflo	t1
+	daddu	t1,t1,t2
+ #	  w1    += c1;
+	daddu	t1,t1,a7
+ #	  c[1]   = ACCUM(w1);
+	sw	t1,4(a3)
+ #	  cy     = CARRYOUT(w1);
+	dsrl32	t2,t1,0
+ #	  c     += 1;
+	b	.L.6
+	addiu	a3,a3,4
+ #      } else {
+.L.5:
+ #	  w0    += c0;
+	daddu	t0,t0,a6
+ #	  c[0]   = ACCUM(w0);
+	sw	t0,0(a3)
+ #	  cy     = CARRYOUT(w0);
+	b	.L.6
+	dsrl32	t2,t0,0
+ #      }
+ #    } else {
+.L.2:
+ #      c0     = c[0];
+	lwu	a6,0(a3)
+ #      w0    += c0;
+	mflo	t0
+	daddu	t0,t0,a6
+ #      c[0]   = ACCUM(w0);
+	sw	t0,0(a3)
+ #      cy     = CARRYOUT(w0);
+	dsrl32	t2,t0,0
+ #    }
+.L.6:
+ #    c[1] = cy;
+	jr	ra
+	sw	t2,4(a3)
+ #  }
+.L.1:
+	jr	ra
+	nop
+ #}
+ #
+        .end    s_mpv_mul_d_add
+
+        .ent    s_mpv_mul_d_add_prop
+        .globl  s_mpv_mul_d_add_prop
+
+s_mpv_mul_d_add_prop: 
+ #/* c += a * b */
+ #void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, 
+ #			      mp_digit *c)
+ #{
+ #  mp_digit   a0, a1;	regs a4, a5
+ #  mp_digit   c0, c1;  regs a6, a7
+ #  mp_digit   cy = 0;  reg t2
+ #  mp_word    w0, w1;  regs t0, t1
+ #
+ #  if (a_len) {
+	beq	a1,zero,.M.1
+	move	t2,zero		# cy = 0
+	dsll32	a2,a2,0		# "b" is sometimes negative (?!?!)
+	dsrl32	a2,a2,0		# This clears the upper 32 bits.
+ #    a0 = a[0];
+	lwu	a4,0(a0)
+ #    w0 = ((mp_word)b * a0);
+	dmultu	a2,a4
+ #    if (--a_len) {
+	addiu	a1,a1,-1
+	beq	a1,zero,.M.2
+ #      while (a_len >= 2) {
+	sltiu	t3,a1,2
+	bne	t3,zero,.M.3
+ #	  a1     = a[1];
+	lwu	a5,4(a0)
+.M.4:
+ #	  a_len -= 2;
+        addiu	a1,a1,-2
+ #	  c0     = c[0];
+	lwu	a6,0(a3)
+ #	  w0    += cy;
+	mflo	t0
+	daddu	t0,t0,t2
+ #	  w0    += c0;
+	daddu	t0,t0,a6
+ #	  w1     = (mp_word)b * a1; 
+	dmultu	a2,a5			#
+ #	  cy     = CARRYOUT(w0);
+	dsrl32	t2,t0,0
+ #	  c[0]   = ACCUM(w0);
+	sw	t0,0(a3)
+ #	  a0     = a[2];
+	lwu	a4,8(a0)
+ #	  a     += 2;
+	addiu	a0,a0,8
+ #	  c1     = c[1];
+	lwu	a7,4(a3)
+ #	  w1    += cy;
+	mflo	t1
+	daddu	t1,t1,t2
+ #	  w1    += c1;
+	daddu	t1,t1,a7
+ #	  w0     = (mp_word)b * a0;
+	dmultu	a2,a4			#
+ #	  cy     = CARRYOUT(w1);
+	dsrl32	t2,t1,0
+ #	  c[1]   = ACCUM(w1);
+	sw	t1,4(a3)
+ #	  c     += 2;
+	addiu	a3,a3,8
+	sltiu	t3,a1,2
+	beq	t3,zero,.M.4
+ #	  a1     = a[1];
+	lwu	a5,4(a0)
+ #      }
+.M.3:
+ #      c0       = c[0];
+	lwu	a6,0(a3)
+ #      w0      += cy;
+ #      if (a_len) {
+	mflo	t0
+	beq	a1,zero,.M.5
+	daddu	t0,t0,t2
+ #	  w1     = (mp_word)b * a1; 
+	dmultu	a2,a5
+ #	  w0    += c0;
+	daddu	t0,t0,a6		#
+ #	  cy     = CARRYOUT(w0);
+	dsrl32	t2,t0,0
+ #	  c[0]   = ACCUM(w0);
+	sw	t0,0(a3)
+ #	  c1     = c[1];
+	lwu	a7,4(a3)
+ #	  w1    += cy;
+	mflo	t1
+	daddu	t1,t1,t2
+ #	  w1    += c1;
+	daddu	t1,t1,a7
+ #	  c[1]   = ACCUM(w1);
+	sw	t1,4(a3)
+ #	  cy     = CARRYOUT(w1);
+	dsrl32	t2,t1,0
+ #	  c     += 1;
+	b	.M.6
+	addiu	a3,a3,8
+ #      } else {
+.M.5:
+ #	  w0    += c0;
+	daddu	t0,t0,a6
+ #	  c[0]   = ACCUM(w0);
+	sw	t0,0(a3)
+ #	  cy     = CARRYOUT(w0);
+	dsrl32	t2,t0,0
+	b	.M.6
+	addiu	a3,a3,4
+ #      }
+ #    } else {
+.M.2:
+ #      c0     = c[0];
+	lwu	a6,0(a3)
+ #      w0    += c0;
+	mflo	t0
+	daddu	t0,t0,a6
+ #      c[0]   = ACCUM(w0);
+	sw	t0,0(a3)
+ #      cy     = CARRYOUT(w0);
+	dsrl32	t2,t0,0
+	addiu	a3,a3,4
+ #    }
+.M.6:
+
+ #    while (cy) {
+	beq	t2,zero,.M.1
+	nop
+.M.7:
+ #      mp_word w = (mp_word)*c + cy;
+	lwu	a6,0(a3)
+	daddu	t2,t2,a6
+ #      *c++ = ACCUM(w);
+	sw	t2,0(a3)
+ #      cy = CARRYOUT(w);
+	dsrl32	t2,t2,0
+	bne	t2,zero,.M.7
+	addiu	a3,a3,4
+
+ #  }
+.M.1:
+	jr	ra
+	nop
+ #}
+ #
+        .end    s_mpv_mul_d_add_prop
+
+        .ent    s_mpv_mul_d
+        .globl  s_mpv_mul_d
+
+s_mpv_mul_d: 
+ #/* c = a * b */
+ #void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, 
+ #			      mp_digit *c)
+ #{
+ #  mp_digit   a0, a1;	regs a4, a5
+ #  mp_digit   cy = 0;  reg t2
+ #  mp_word    w0, w1;  regs t0, t1
+ #
+ #  if (a_len) {
+	beq	a1,zero,.N.1
+	move	t2,zero		# cy = 0
+	dsll32	a2,a2,0		# "b" is sometimes negative (?!?!)
+	dsrl32	a2,a2,0		# This clears the upper 32 bits.
+ #    a0 = a[0];
+	lwu	a4,0(a0)
+ #    w0 = ((mp_word)b * a0);
+	dmultu	a2,a4
+ #    if (--a_len) {
+	addiu	a1,a1,-1
+	beq	a1,zero,.N.2
+ #      while (a_len >= 2) {
+	sltiu	t3,a1,2
+	bne	t3,zero,.N.3
+ #	  a1     = a[1];
+	lwu	a5,4(a0)
+.N.4:
+ #	  a_len -= 2;
+        addiu	a1,a1,-2
+ #	  w0    += cy;
+	mflo	t0
+	daddu	t0,t0,t2
+ #	  cy     = CARRYOUT(w0);
+	dsrl32	t2,t0,0
+ #	  w1     = (mp_word)b * a1; 
+	dmultu	a2,a5	
+ #	  c[0]   = ACCUM(w0);
+	sw	t0,0(a3)
+ #	  a0     = a[2];
+	lwu	a4,8(a0)
+ #	  a     += 2;
+	addiu	a0,a0,8
+ #	  w1    += cy;
+	mflo	t1
+	daddu	t1,t1,t2
+ #	  cy     = CARRYOUT(w1);
+	dsrl32	t2,t1,0
+ #	  w0     = (mp_word)b * a0;
+	dmultu	a2,a4	
+ #	  c[1]   = ACCUM(w1);
+	sw	t1,4(a3)
+ #	  c     += 2;
+	addiu	a3,a3,8
+	sltiu	t3,a1,2
+	beq	t3,zero,.N.4
+ #	  a1     = a[1];
+	lwu	a5,4(a0)
+ #      }
+.N.3:
+ #      w0      += cy;
+ #      if (a_len) {
+	mflo	t0
+	beq	a1,zero,.N.5
+	daddu	t0,t0,t2
+ #	  w1     = (mp_word)b * a1; 
+	dmultu	a2,a5			#
+ #	  cy     = CARRYOUT(w0);
+	dsrl32	t2,t0,0
+ #	  c[0]   = ACCUM(w0);
+	sw	t0,0(a3)
+ #	  w1    += cy;
+	mflo	t1
+	daddu	t1,t1,t2
+ #	  c[1]   = ACCUM(w1);
+	sw	t1,4(a3)
+ #	  cy     = CARRYOUT(w1);
+	dsrl32	t2,t1,0
+ #	  c     += 1;
+	b	.N.6
+	addiu	a3,a3,4
+ #      } else {
+.N.5:
+ #	  c[0]   = ACCUM(w0);
+	sw	t0,0(a3)
+ #	  cy     = CARRYOUT(w0);
+	b	.N.6
+	dsrl32	t2,t0,0
+ #      }
+ #    } else {
+.N.2:
+	mflo	t0
+ #      c[0]   = ACCUM(w0);
+	sw	t0,0(a3)
+ #      cy     = CARRYOUT(w0);
+	dsrl32	t2,t0,0
+ #    }
+.N.6:
+ #    c[1] = cy;
+	jr	ra
+	sw	t2,4(a3)
+ #  }
+.N.1:
+	jr	ra
+	nop
+ #}
+ #
+        .end    s_mpv_mul_d
author	nelsonb%netscape.com <devnull@localhost>	2000-08-22 00:55:10 +0000
committer	nelsonb%netscape.com <devnull@localhost>	2000-08-22 00:55:10 +0000
commit	43ef803f2f6a7d8264c6e97e9f6e05e5e2b3fba2 (patch)
tree	6db835422464f62cbe76adb771e1632f15cbb97b /security/nss/lib/freebl/mpi/mpi_mips.s
parent	8e5db081096ef5ba35c5bea39eaed233f53da43d (diff)
download	nss-hg-43ef803f2f6a7d8264c6e97e9f6e05e5e2b3fba2.tar.gz