diff options
author | nelsonb%netscape.com <devnull@localhost> | 2000-08-22 00:55:10 +0000 |
---|---|---|
committer | nelsonb%netscape.com <devnull@localhost> | 2000-08-22 00:55:10 +0000 |
commit | 43ef803f2f6a7d8264c6e97e9f6e05e5e2b3fba2 (patch) | |
tree | 6db835422464f62cbe76adb771e1632f15cbb97b /security/nss/lib/freebl/mpi/mpi_mips.s | |
parent | 8e5db081096ef5ba35c5bea39eaed233f53da43d (diff) | |
download | nss-hg-43ef803f2f6a7d8264c6e97e9f6e05e5e2b3fba2.tar.gz |
MIPS assembler code to optimize inner multiply loops for mips3 CPUs.
Diffstat (limited to 'security/nss/lib/freebl/mpi/mpi_mips.s')
-rw-r--r-- | security/nss/lib/freebl/mpi/mpi_mips.s | 433 |
1 files changed, 433 insertions, 0 deletions
diff --git a/security/nss/lib/freebl/mpi/mpi_mips.s b/security/nss/lib/freebl/mpi/mpi_mips.s new file mode 100644 index 000000000..18d4ff870 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi_mips.s @@ -0,0 +1,433 @@ +/* + * The contents of this file are subject to the Mozilla Public + * License Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS + * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or + * implied. See the License for the specific language governing + * rights and limitations under the License. + * + * The Original Code is the Netscape security libraries. + * + * The Initial Developer of the Original Code is Netscape + * Communications Corporation. Portions created by Netscape are + * Copyright (C) 2000 Netscape Communications Corporation. All + * Rights Reserved. + * + * Contributor(s): + * + * Alternatively, the contents of this file may be used under the + * terms of the GNU General Public License Version 2 or later (the + * "GPL"), in which case the provisions of the GPL are applicable + * instead of those above. If you wish to allow use of your + * version of this file only under the terms of the GPL and not to + * allow others to use your version of this file under the MPL, + * indicate your decision by deleting the provisions above and + * replace them with the notice and other provisions required by + * the GPL. If you do not delete the provisions above, a recipient + * may use your version of this file under either the MPL or the + * GPL. + * $Id$ + */ +#include <regdef.h> + .set noreorder + .set noat + + .section .text, 1, 0x00000006, 4, 4 +.text: + .section .text + + .ent s_mpv_mul_d_add + .globl s_mpv_mul_d_add + +s_mpv_mul_d_add: + #/* c += a * b */ + #void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, + # mp_digit *c) + #{ + # mp_digit a0, a1; regs a4, a5 + # mp_digit c0, c1; regs a6, a7 + # mp_digit cy = 0; reg t2 + # mp_word w0, w1; regs t0, t1 + # + # if (a_len) { + beq a1,zero,.L.1 + move t2,zero # cy = 0 + dsll32 a2,a2,0 # "b" is sometimes negative (?!?!) + dsrl32 a2,a2,0 # This clears the upper 32 bits. + # a0 = a[0]; + lwu a4,0(a0) + # w0 = ((mp_word)b * a0); + dmultu a2,a4 + # if (--a_len) { + addiu a1,a1,-1 + beq a1,zero,.L.2 + # while (a_len >= 2) { + sltiu t3,a1,2 + bne t3,zero,.L.3 + # a1 = a[1]; + lwu a5,4(a0) +.L.4: + # a_len -= 2; + addiu a1,a1,-2 + # c0 = c[0]; + lwu a6,0(a3) + # w0 += cy; + mflo t0 + daddu t0,t0,t2 + # w0 += c0; + daddu t0,t0,a6 + # w1 = (mp_word)b * a1; + dmultu a2,a5 # + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # a0 = a[2]; + lwu a4,8(a0) + # a += 2; + addiu a0,a0,8 + # c1 = c[1]; + lwu a7,4(a3) + # w1 += cy; + mflo t1 + daddu t1,t1,t2 + # w1 += c1; + daddu t1,t1,a7 + # w0 = (mp_word)b * a0; + dmultu a2,a4 # + # cy = CARRYOUT(w1); + dsrl32 t2,t1,0 + # c[1] = ACCUM(w1); + sw t1,4(a3) + # c += 2; + addiu a3,a3,8 + sltiu t3,a1,2 + beq t3,zero,.L.4 + # a1 = a[1]; + lwu a5,4(a0) + # } +.L.3: + # c0 = c[0]; + lwu a6,0(a3) + # w0 += cy; + # if (a_len) { + mflo t0 + beq a1,zero,.L.5 + daddu t0,t0,t2 + # w1 = (mp_word)b * a1; + dmultu a2,a5 + # w0 += c0; + daddu t0,t0,a6 # + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # c1 = c[1]; + lwu a7,4(a3) + # w1 += cy; + mflo t1 + daddu t1,t1,t2 + # w1 += c1; + daddu t1,t1,a7 + # c[1] = ACCUM(w1); + sw t1,4(a3) + # cy = CARRYOUT(w1); + dsrl32 t2,t1,0 + # c += 1; + b .L.6 + addiu a3,a3,4 + # } else { +.L.5: + # w0 += c0; + daddu t0,t0,a6 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # cy = CARRYOUT(w0); + b .L.6 + dsrl32 t2,t0,0 + # } + # } else { +.L.2: + # c0 = c[0]; + lwu a6,0(a3) + # w0 += c0; + mflo t0 + daddu t0,t0,a6 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + # } +.L.6: + # c[1] = cy; + jr ra + sw t2,4(a3) + # } +.L.1: + jr ra + nop + #} + # + .end s_mpv_mul_d_add + + .ent s_mpv_mul_d_add_prop + .globl s_mpv_mul_d_add_prop + +s_mpv_mul_d_add_prop: + #/* c += a * b */ + #void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, + # mp_digit *c) + #{ + # mp_digit a0, a1; regs a4, a5 + # mp_digit c0, c1; regs a6, a7 + # mp_digit cy = 0; reg t2 + # mp_word w0, w1; regs t0, t1 + # + # if (a_len) { + beq a1,zero,.M.1 + move t2,zero # cy = 0 + dsll32 a2,a2,0 # "b" is sometimes negative (?!?!) + dsrl32 a2,a2,0 # This clears the upper 32 bits. + # a0 = a[0]; + lwu a4,0(a0) + # w0 = ((mp_word)b * a0); + dmultu a2,a4 + # if (--a_len) { + addiu a1,a1,-1 + beq a1,zero,.M.2 + # while (a_len >= 2) { + sltiu t3,a1,2 + bne t3,zero,.M.3 + # a1 = a[1]; + lwu a5,4(a0) +.M.4: + # a_len -= 2; + addiu a1,a1,-2 + # c0 = c[0]; + lwu a6,0(a3) + # w0 += cy; + mflo t0 + daddu t0,t0,t2 + # w0 += c0; + daddu t0,t0,a6 + # w1 = (mp_word)b * a1; + dmultu a2,a5 # + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # a0 = a[2]; + lwu a4,8(a0) + # a += 2; + addiu a0,a0,8 + # c1 = c[1]; + lwu a7,4(a3) + # w1 += cy; + mflo t1 + daddu t1,t1,t2 + # w1 += c1; + daddu t1,t1,a7 + # w0 = (mp_word)b * a0; + dmultu a2,a4 # + # cy = CARRYOUT(w1); + dsrl32 t2,t1,0 + # c[1] = ACCUM(w1); + sw t1,4(a3) + # c += 2; + addiu a3,a3,8 + sltiu t3,a1,2 + beq t3,zero,.M.4 + # a1 = a[1]; + lwu a5,4(a0) + # } +.M.3: + # c0 = c[0]; + lwu a6,0(a3) + # w0 += cy; + # if (a_len) { + mflo t0 + beq a1,zero,.M.5 + daddu t0,t0,t2 + # w1 = (mp_word)b * a1; + dmultu a2,a5 + # w0 += c0; + daddu t0,t0,a6 # + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # c1 = c[1]; + lwu a7,4(a3) + # w1 += cy; + mflo t1 + daddu t1,t1,t2 + # w1 += c1; + daddu t1,t1,a7 + # c[1] = ACCUM(w1); + sw t1,4(a3) + # cy = CARRYOUT(w1); + dsrl32 t2,t1,0 + # c += 1; + b .M.6 + addiu a3,a3,8 + # } else { +.M.5: + # w0 += c0; + daddu t0,t0,a6 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + b .M.6 + addiu a3,a3,4 + # } + # } else { +.M.2: + # c0 = c[0]; + lwu a6,0(a3) + # w0 += c0; + mflo t0 + daddu t0,t0,a6 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + addiu a3,a3,4 + # } +.M.6: + + # while (cy) { + beq t2,zero,.M.1 + nop +.M.7: + # mp_word w = (mp_word)*c + cy; + lwu a6,0(a3) + daddu t2,t2,a6 + # *c++ = ACCUM(w); + sw t2,0(a3) + # cy = CARRYOUT(w); + dsrl32 t2,t2,0 + bne t2,zero,.M.7 + addiu a3,a3,4 + + # } +.M.1: + jr ra + nop + #} + # + .end s_mpv_mul_d_add_prop + + .ent s_mpv_mul_d + .globl s_mpv_mul_d + +s_mpv_mul_d: + #/* c = a * b */ + #void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, + # mp_digit *c) + #{ + # mp_digit a0, a1; regs a4, a5 + # mp_digit cy = 0; reg t2 + # mp_word w0, w1; regs t0, t1 + # + # if (a_len) { + beq a1,zero,.N.1 + move t2,zero # cy = 0 + dsll32 a2,a2,0 # "b" is sometimes negative (?!?!) + dsrl32 a2,a2,0 # This clears the upper 32 bits. + # a0 = a[0]; + lwu a4,0(a0) + # w0 = ((mp_word)b * a0); + dmultu a2,a4 + # if (--a_len) { + addiu a1,a1,-1 + beq a1,zero,.N.2 + # while (a_len >= 2) { + sltiu t3,a1,2 + bne t3,zero,.N.3 + # a1 = a[1]; + lwu a5,4(a0) +.N.4: + # a_len -= 2; + addiu a1,a1,-2 + # w0 += cy; + mflo t0 + daddu t0,t0,t2 + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + # w1 = (mp_word)b * a1; + dmultu a2,a5 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # a0 = a[2]; + lwu a4,8(a0) + # a += 2; + addiu a0,a0,8 + # w1 += cy; + mflo t1 + daddu t1,t1,t2 + # cy = CARRYOUT(w1); + dsrl32 t2,t1,0 + # w0 = (mp_word)b * a0; + dmultu a2,a4 + # c[1] = ACCUM(w1); + sw t1,4(a3) + # c += 2; + addiu a3,a3,8 + sltiu t3,a1,2 + beq t3,zero,.N.4 + # a1 = a[1]; + lwu a5,4(a0) + # } +.N.3: + # w0 += cy; + # if (a_len) { + mflo t0 + beq a1,zero,.N.5 + daddu t0,t0,t2 + # w1 = (mp_word)b * a1; + dmultu a2,a5 # + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # w1 += cy; + mflo t1 + daddu t1,t1,t2 + # c[1] = ACCUM(w1); + sw t1,4(a3) + # cy = CARRYOUT(w1); + dsrl32 t2,t1,0 + # c += 1; + b .N.6 + addiu a3,a3,4 + # } else { +.N.5: + # c[0] = ACCUM(w0); + sw t0,0(a3) + # cy = CARRYOUT(w0); + b .N.6 + dsrl32 t2,t0,0 + # } + # } else { +.N.2: + mflo t0 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + # } +.N.6: + # c[1] = cy; + jr ra + sw t2,4(a3) + # } +.N.1: + jr ra + nop + #} + # + .end s_mpv_mul_d |