summaryrefslogtreecommitdiff
path: root/security/nss/lib/freebl/mpi/mpi_mips.s
diff options
context:
space:
mode:
authornelsonb%netscape.com <devnull@localhost>2000-08-22 00:55:10 +0000
committernelsonb%netscape.com <devnull@localhost>2000-08-22 00:55:10 +0000
commit43ef803f2f6a7d8264c6e97e9f6e05e5e2b3fba2 (patch)
tree6db835422464f62cbe76adb771e1632f15cbb97b /security/nss/lib/freebl/mpi/mpi_mips.s
parent8e5db081096ef5ba35c5bea39eaed233f53da43d (diff)
downloadnss-hg-43ef803f2f6a7d8264c6e97e9f6e05e5e2b3fba2.tar.gz
MIPS assembler code to optimize inner multiply loops for mips3 CPUs.
Diffstat (limited to 'security/nss/lib/freebl/mpi/mpi_mips.s')
-rw-r--r--security/nss/lib/freebl/mpi/mpi_mips.s433
1 files changed, 433 insertions, 0 deletions
diff --git a/security/nss/lib/freebl/mpi/mpi_mips.s b/security/nss/lib/freebl/mpi/mpi_mips.s
new file mode 100644
index 000000000..18d4ff870
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_mips.s
@@ -0,0 +1,433 @@
+/*
+ * The contents of this file are subject to the Mozilla Public
+ * License Version 1.1 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS
+ * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+ * implied. See the License for the specific language governing
+ * rights and limitations under the License.
+ *
+ * The Original Code is the Netscape security libraries.
+ *
+ * The Initial Developer of the Original Code is Netscape
+ * Communications Corporation. Portions created by Netscape are
+ * Copyright (C) 2000 Netscape Communications Corporation. All
+ * Rights Reserved.
+ *
+ * Contributor(s):
+ *
+ * Alternatively, the contents of this file may be used under the
+ * terms of the GNU General Public License Version 2 or later (the
+ * "GPL"), in which case the provisions of the GPL are applicable
+ * instead of those above. If you wish to allow use of your
+ * version of this file only under the terms of the GPL and not to
+ * allow others to use your version of this file under the MPL,
+ * indicate your decision by deleting the provisions above and
+ * replace them with the notice and other provisions required by
+ * the GPL. If you do not delete the provisions above, a recipient
+ * may use your version of this file under either the MPL or the
+ * GPL.
+ * $Id$
+ */
+#include <regdef.h>
+ .set noreorder
+ .set noat
+
+ .section .text, 1, 0x00000006, 4, 4
+.text:
+ .section .text
+
+ .ent s_mpv_mul_d_add
+ .globl s_mpv_mul_d_add
+
+s_mpv_mul_d_add:
+ #/* c += a * b */
+ #void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b,
+ # mp_digit *c)
+ #{
+ # mp_digit a0, a1; regs a4, a5
+ # mp_digit c0, c1; regs a6, a7
+ # mp_digit cy = 0; reg t2
+ # mp_word w0, w1; regs t0, t1
+ #
+ # if (a_len) {
+ beq a1,zero,.L.1
+ move t2,zero # cy = 0
+ dsll32 a2,a2,0 # "b" is sometimes negative (?!?!)
+ dsrl32 a2,a2,0 # This clears the upper 32 bits.
+ # a0 = a[0];
+ lwu a4,0(a0)
+ # w0 = ((mp_word)b * a0);
+ dmultu a2,a4
+ # if (--a_len) {
+ addiu a1,a1,-1
+ beq a1,zero,.L.2
+ # while (a_len >= 2) {
+ sltiu t3,a1,2
+ bne t3,zero,.L.3
+ # a1 = a[1];
+ lwu a5,4(a0)
+.L.4:
+ # a_len -= 2;
+ addiu a1,a1,-2
+ # c0 = c[0];
+ lwu a6,0(a3)
+ # w0 += cy;
+ mflo t0
+ daddu t0,t0,t2
+ # w0 += c0;
+ daddu t0,t0,a6
+ # w1 = (mp_word)b * a1;
+ dmultu a2,a5 #
+ # cy = CARRYOUT(w0);
+ dsrl32 t2,t0,0
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # a0 = a[2];
+ lwu a4,8(a0)
+ # a += 2;
+ addiu a0,a0,8
+ # c1 = c[1];
+ lwu a7,4(a3)
+ # w1 += cy;
+ mflo t1
+ daddu t1,t1,t2
+ # w1 += c1;
+ daddu t1,t1,a7
+ # w0 = (mp_word)b * a0;
+ dmultu a2,a4 #
+ # cy = CARRYOUT(w1);
+ dsrl32 t2,t1,0
+ # c[1] = ACCUM(w1);
+ sw t1,4(a3)
+ # c += 2;
+ addiu a3,a3,8
+ sltiu t3,a1,2
+ beq t3,zero,.L.4
+ # a1 = a[1];
+ lwu a5,4(a0)
+ # }
+.L.3:
+ # c0 = c[0];
+ lwu a6,0(a3)
+ # w0 += cy;
+ # if (a_len) {
+ mflo t0
+ beq a1,zero,.L.5
+ daddu t0,t0,t2
+ # w1 = (mp_word)b * a1;
+ dmultu a2,a5
+ # w0 += c0;
+ daddu t0,t0,a6 #
+ # cy = CARRYOUT(w0);
+ dsrl32 t2,t0,0
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # c1 = c[1];
+ lwu a7,4(a3)
+ # w1 += cy;
+ mflo t1
+ daddu t1,t1,t2
+ # w1 += c1;
+ daddu t1,t1,a7
+ # c[1] = ACCUM(w1);
+ sw t1,4(a3)
+ # cy = CARRYOUT(w1);
+ dsrl32 t2,t1,0
+ # c += 1;
+ b .L.6
+ addiu a3,a3,4
+ # } else {
+.L.5:
+ # w0 += c0;
+ daddu t0,t0,a6
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # cy = CARRYOUT(w0);
+ b .L.6
+ dsrl32 t2,t0,0
+ # }
+ # } else {
+.L.2:
+ # c0 = c[0];
+ lwu a6,0(a3)
+ # w0 += c0;
+ mflo t0
+ daddu t0,t0,a6
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # cy = CARRYOUT(w0);
+ dsrl32 t2,t0,0
+ # }
+.L.6:
+ # c[1] = cy;
+ jr ra
+ sw t2,4(a3)
+ # }
+.L.1:
+ jr ra
+ nop
+ #}
+ #
+ .end s_mpv_mul_d_add
+
+ .ent s_mpv_mul_d_add_prop
+ .globl s_mpv_mul_d_add_prop
+
+s_mpv_mul_d_add_prop:
+ #/* c += a * b */
+ #void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b,
+ # mp_digit *c)
+ #{
+ # mp_digit a0, a1; regs a4, a5
+ # mp_digit c0, c1; regs a6, a7
+ # mp_digit cy = 0; reg t2
+ # mp_word w0, w1; regs t0, t1
+ #
+ # if (a_len) {
+ beq a1,zero,.M.1
+ move t2,zero # cy = 0
+ dsll32 a2,a2,0 # "b" is sometimes negative (?!?!)
+ dsrl32 a2,a2,0 # This clears the upper 32 bits.
+ # a0 = a[0];
+ lwu a4,0(a0)
+ # w0 = ((mp_word)b * a0);
+ dmultu a2,a4
+ # if (--a_len) {
+ addiu a1,a1,-1
+ beq a1,zero,.M.2
+ # while (a_len >= 2) {
+ sltiu t3,a1,2
+ bne t3,zero,.M.3
+ # a1 = a[1];
+ lwu a5,4(a0)
+.M.4:
+ # a_len -= 2;
+ addiu a1,a1,-2
+ # c0 = c[0];
+ lwu a6,0(a3)
+ # w0 += cy;
+ mflo t0
+ daddu t0,t0,t2
+ # w0 += c0;
+ daddu t0,t0,a6
+ # w1 = (mp_word)b * a1;
+ dmultu a2,a5 #
+ # cy = CARRYOUT(w0);
+ dsrl32 t2,t0,0
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # a0 = a[2];
+ lwu a4,8(a0)
+ # a += 2;
+ addiu a0,a0,8
+ # c1 = c[1];
+ lwu a7,4(a3)
+ # w1 += cy;
+ mflo t1
+ daddu t1,t1,t2
+ # w1 += c1;
+ daddu t1,t1,a7
+ # w0 = (mp_word)b * a0;
+ dmultu a2,a4 #
+ # cy = CARRYOUT(w1);
+ dsrl32 t2,t1,0
+ # c[1] = ACCUM(w1);
+ sw t1,4(a3)
+ # c += 2;
+ addiu a3,a3,8
+ sltiu t3,a1,2
+ beq t3,zero,.M.4
+ # a1 = a[1];
+ lwu a5,4(a0)
+ # }
+.M.3:
+ # c0 = c[0];
+ lwu a6,0(a3)
+ # w0 += cy;
+ # if (a_len) {
+ mflo t0
+ beq a1,zero,.M.5
+ daddu t0,t0,t2
+ # w1 = (mp_word)b * a1;
+ dmultu a2,a5
+ # w0 += c0;
+ daddu t0,t0,a6 #
+ # cy = CARRYOUT(w0);
+ dsrl32 t2,t0,0
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # c1 = c[1];
+ lwu a7,4(a3)
+ # w1 += cy;
+ mflo t1
+ daddu t1,t1,t2
+ # w1 += c1;
+ daddu t1,t1,a7
+ # c[1] = ACCUM(w1);
+ sw t1,4(a3)
+ # cy = CARRYOUT(w1);
+ dsrl32 t2,t1,0
+ # c += 1;
+ b .M.6
+ addiu a3,a3,8
+ # } else {
+.M.5:
+ # w0 += c0;
+ daddu t0,t0,a6
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # cy = CARRYOUT(w0);
+ dsrl32 t2,t0,0
+ b .M.6
+ addiu a3,a3,4
+ # }
+ # } else {
+.M.2:
+ # c0 = c[0];
+ lwu a6,0(a3)
+ # w0 += c0;
+ mflo t0
+ daddu t0,t0,a6
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # cy = CARRYOUT(w0);
+ dsrl32 t2,t0,0
+ addiu a3,a3,4
+ # }
+.M.6:
+
+ # while (cy) {
+ beq t2,zero,.M.1
+ nop
+.M.7:
+ # mp_word w = (mp_word)*c + cy;
+ lwu a6,0(a3)
+ daddu t2,t2,a6
+ # *c++ = ACCUM(w);
+ sw t2,0(a3)
+ # cy = CARRYOUT(w);
+ dsrl32 t2,t2,0
+ bne t2,zero,.M.7
+ addiu a3,a3,4
+
+ # }
+.M.1:
+ jr ra
+ nop
+ #}
+ #
+ .end s_mpv_mul_d_add_prop
+
+ .ent s_mpv_mul_d
+ .globl s_mpv_mul_d
+
+s_mpv_mul_d:
+ #/* c = a * b */
+ #void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b,
+ # mp_digit *c)
+ #{
+ # mp_digit a0, a1; regs a4, a5
+ # mp_digit cy = 0; reg t2
+ # mp_word w0, w1; regs t0, t1
+ #
+ # if (a_len) {
+ beq a1,zero,.N.1
+ move t2,zero # cy = 0
+ dsll32 a2,a2,0 # "b" is sometimes negative (?!?!)
+ dsrl32 a2,a2,0 # This clears the upper 32 bits.
+ # a0 = a[0];
+ lwu a4,0(a0)
+ # w0 = ((mp_word)b * a0);
+ dmultu a2,a4
+ # if (--a_len) {
+ addiu a1,a1,-1
+ beq a1,zero,.N.2
+ # while (a_len >= 2) {
+ sltiu t3,a1,2
+ bne t3,zero,.N.3
+ # a1 = a[1];
+ lwu a5,4(a0)
+.N.4:
+ # a_len -= 2;
+ addiu a1,a1,-2
+ # w0 += cy;
+ mflo t0
+ daddu t0,t0,t2
+ # cy = CARRYOUT(w0);
+ dsrl32 t2,t0,0
+ # w1 = (mp_word)b * a1;
+ dmultu a2,a5
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # a0 = a[2];
+ lwu a4,8(a0)
+ # a += 2;
+ addiu a0,a0,8
+ # w1 += cy;
+ mflo t1
+ daddu t1,t1,t2
+ # cy = CARRYOUT(w1);
+ dsrl32 t2,t1,0
+ # w0 = (mp_word)b * a0;
+ dmultu a2,a4
+ # c[1] = ACCUM(w1);
+ sw t1,4(a3)
+ # c += 2;
+ addiu a3,a3,8
+ sltiu t3,a1,2
+ beq t3,zero,.N.4
+ # a1 = a[1];
+ lwu a5,4(a0)
+ # }
+.N.3:
+ # w0 += cy;
+ # if (a_len) {
+ mflo t0
+ beq a1,zero,.N.5
+ daddu t0,t0,t2
+ # w1 = (mp_word)b * a1;
+ dmultu a2,a5 #
+ # cy = CARRYOUT(w0);
+ dsrl32 t2,t0,0
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # w1 += cy;
+ mflo t1
+ daddu t1,t1,t2
+ # c[1] = ACCUM(w1);
+ sw t1,4(a3)
+ # cy = CARRYOUT(w1);
+ dsrl32 t2,t1,0
+ # c += 1;
+ b .N.6
+ addiu a3,a3,4
+ # } else {
+.N.5:
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # cy = CARRYOUT(w0);
+ b .N.6
+ dsrl32 t2,t0,0
+ # }
+ # } else {
+.N.2:
+ mflo t0
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # cy = CARRYOUT(w0);
+ dsrl32 t2,t0,0
+ # }
+.N.6:
+ # c[1] = cy;
+ jr ra
+ sw t2,4(a3)
+ # }
+.N.1:
+ jr ra
+ nop
+ #}
+ #
+ .end s_mpv_mul_d