summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortege <tege@gmplib.org>2000-02-15 13:24:20 +0100
committertege <tege@gmplib.org>2000-02-15 13:24:20 +0100
commitf6149307d2ecf6de15bd2ed219eef6c3ef7f1f4a (patch)
tree1f06701c518c03f718cbfb70f6ae72fdd12e286f
parent11bbd14af13c6a251d675f56e8fd654b0f52a8d5 (diff)
downloadgmp-f6149307d2ecf6de15bd2ed219eef6c3ef7f1f4a.tar.gz
Renamed from `.s'/`.S'.
-rw-r--r--mpn/sparc32/add_n.asm236
-rw-r--r--mpn/sparc32/addmul_1.asm146
-rw-r--r--mpn/sparc32/lshift.asm96
-rw-r--r--mpn/sparc32/mul_1.asm199
-rw-r--r--mpn/sparc32/rshift.asm93
-rw-r--r--mpn/sparc32/sub_n.asm326
-rw-r--r--mpn/sparc32/submul_1.asm146
-rw-r--r--mpn/sparc32/udiv_fp.asm142
-rw-r--r--mpn/sparc32/udiv_nfp.asm193
-rw-r--r--mpn/sparc32/umul.asm67
-rw-r--r--mpn/sparc32/v8/addmul_1.asm122
-rw-r--r--mpn/sparc32/v8/mul_1.asm103
-rw-r--r--mpn/sparc32/v8/submul_1.asm58
-rw-r--r--mpn/sparc32/v8/supersparc/udiv.asm106
-rw-r--r--mpn/sparc32/v8/umul.asm9
-rw-r--r--mpn/sparc32/v9/addmul_1.asm282
-rw-r--r--mpn/sparc32/v9/mul_1.asm261
-rw-r--r--mpn/sparc32/v9/submul_1.asm285
-rw-r--r--mpn/sparc64/add_n.asm172
-rw-r--r--mpn/sparc64/addmul_1.asm86
-rw-r--r--mpn/sparc64/copyi.asm77
-rw-r--r--mpn/sparc64/lshift.asm97
-rw-r--r--mpn/sparc64/mul_1.asm81
-rw-r--r--mpn/sparc64/rshift.asm94
-rw-r--r--mpn/sparc64/sub_n.asm172
-rw-r--r--mpn/sparc64/submul_1.asm86
26 files changed, 3735 insertions, 0 deletions
diff --git a/mpn/sparc32/add_n.asm b/mpn/sparc32/add_n.asm
new file mode 100644
index 000000000..7bbe37668
--- /dev/null
+++ b/mpn/sparc32/add_n.asm
@@ -0,0 +1,236 @@
+! SPARC __mpn_add_n -- Add two limb vectors of the same length > 0 and store
+! sum in a third limb vector.
+
+! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+define(res_ptr,%o0)
+define(s1_ptr,%o1)
+define(s2_ptr,%o2)
+define(size,%o3)
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_add_n)
+ xor s2_ptr,res_ptr,%g1
+ andcc %g1,4,%g0
+ bne L(1) ! branch if alignment differs
+ nop
+! ** V1a **
+L(0): andcc res_ptr,4,%g0 ! res_ptr unaligned? Side effect: cy=0
+ be L(v1) ! if no, branch
+ nop
+/* Add least significant limb separately to align res_ptr and s2_ptr */
+ ld [s1_ptr],%g4
+ add s1_ptr,4,s1_ptr
+ ld [s2_ptr],%g2
+ add s2_ptr,4,s2_ptr
+ add size,-1,size
+ addcc %g4,%g2,%o4
+ st %o4,[res_ptr]
+ add res_ptr,4,res_ptr
+L(v1): addx %g0,%g0,%o4 ! save cy in register
+ cmp size,2 ! if size < 2 ...
+ bl L(end2) ! ... branch to tail code
+ subcc %g0,%o4,%g0 ! restore cy
+
+ ld [s1_ptr+0],%g4
+ addcc size,-10,size
+ ld [s1_ptr+4],%g1
+ ldd [s2_ptr+0],%g2
+ blt L(fin1)
+ subcc %g0,%o4,%g0 ! restore cy
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+L(loop1):
+ addxcc %g4,%g2,%o4
+ ld [s1_ptr+8],%g4
+ addxcc %g1,%g3,%o5
+ ld [s1_ptr+12],%g1
+ ldd [s2_ptr+8],%g2
+ std %o4,[res_ptr+0]
+ addxcc %g4,%g2,%o4
+ ld [s1_ptr+16],%g4
+ addxcc %g1,%g3,%o5
+ ld [s1_ptr+20],%g1
+ ldd [s2_ptr+16],%g2
+ std %o4,[res_ptr+8]
+ addxcc %g4,%g2,%o4
+ ld [s1_ptr+24],%g4
+ addxcc %g1,%g3,%o5
+ ld [s1_ptr+28],%g1
+ ldd [s2_ptr+24],%g2
+ std %o4,[res_ptr+16]
+ addxcc %g4,%g2,%o4
+ ld [s1_ptr+32],%g4
+ addxcc %g1,%g3,%o5
+ ld [s1_ptr+36],%g1
+ ldd [s2_ptr+32],%g2
+ std %o4,[res_ptr+24]
+ addx %g0,%g0,%o4 ! save cy in register
+ addcc size,-8,size
+ add s1_ptr,32,s1_ptr
+ add s2_ptr,32,s2_ptr
+ add res_ptr,32,res_ptr
+ bge L(loop1)
+ subcc %g0,%o4,%g0 ! restore cy
+
+L(fin1):
+ addcc size,8-2,size
+ blt L(end1)
+ subcc %g0,%o4,%g0 ! restore cy
+/* Add blocks of 2 limbs until less than 2 limbs remain */
+L(loope1):
+ addxcc %g4,%g2,%o4
+ ld [s1_ptr+8],%g4
+ addxcc %g1,%g3,%o5
+ ld [s1_ptr+12],%g1
+ ldd [s2_ptr+8],%g2
+ std %o4,[res_ptr+0]
+ addx %g0,%g0,%o4 ! save cy in register
+ addcc size,-2,size
+ add s1_ptr,8,s1_ptr
+ add s2_ptr,8,s2_ptr
+ add res_ptr,8,res_ptr
+ bge L(loope1)
+ subcc %g0,%o4,%g0 ! restore cy
+L(end1):
+ addxcc %g4,%g2,%o4
+ addxcc %g1,%g3,%o5
+ std %o4,[res_ptr+0]
+ addx %g0,%g0,%o4 ! save cy in register
+
+ andcc size,1,%g0
+ be L(ret1)
+ subcc %g0,%o4,%g0 ! restore cy
+/* Add last limb */
+ ld [s1_ptr+8],%g4
+ ld [s2_ptr+8],%g2
+ addxcc %g4,%g2,%o4
+ st %o4,[res_ptr+8]
+
+L(ret1):
+ retl
+ addx %g0,%g0,%o0 ! return carry-out from most sign. limb
+
+L(1): xor s1_ptr,res_ptr,%g1
+ andcc %g1,4,%g0
+ bne L(2)
+ nop
+! ** V1b **
+ mov s2_ptr,%g1
+ mov s1_ptr,s2_ptr
+ b L(0)
+ mov %g1,s1_ptr
+
+! ** V2 **
+/* If we come here, the alignment of s1_ptr and res_ptr as well as the
+ alignment of s2_ptr and res_ptr differ. Since there are only two ways
+ things can be aligned (that we care about) we now know that the alignment
+ of s1_ptr and s2_ptr are the same. */
+
+L(2): cmp size,1
+ be L(jone)
+ nop
+ andcc s1_ptr,4,%g0 ! s1_ptr unaligned? Side effect: cy=0
+ be L(v2) ! if no, branch
+ nop
+/* Add least significant limb separately to align s1_ptr and s2_ptr */
+ ld [s1_ptr],%g4
+ add s1_ptr,4,s1_ptr
+ ld [s2_ptr],%g2
+ add s2_ptr,4,s2_ptr
+ add size,-1,size
+ addcc %g4,%g2,%o4
+ st %o4,[res_ptr]
+ add res_ptr,4,res_ptr
+
+L(v2): addx %g0,%g0,%o4 ! save cy in register
+ addcc size,-8,size
+ blt L(fin2)
+ subcc %g0,%o4,%g0 ! restore cy
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+L(loop2):
+ ldd [s1_ptr+0],%g2
+ ldd [s2_ptr+0],%o4
+ addxcc %g2,%o4,%g2
+ st %g2,[res_ptr+0]
+ addxcc %g3,%o5,%g3
+ st %g3,[res_ptr+4]
+ ldd [s1_ptr+8],%g2
+ ldd [s2_ptr+8],%o4
+ addxcc %g2,%o4,%g2
+ st %g2,[res_ptr+8]
+ addxcc %g3,%o5,%g3
+ st %g3,[res_ptr+12]
+ ldd [s1_ptr+16],%g2
+ ldd [s2_ptr+16],%o4
+ addxcc %g2,%o4,%g2
+ st %g2,[res_ptr+16]
+ addxcc %g3,%o5,%g3
+ st %g3,[res_ptr+20]
+ ldd [s1_ptr+24],%g2
+ ldd [s2_ptr+24],%o4
+ addxcc %g2,%o4,%g2
+ st %g2,[res_ptr+24]
+ addxcc %g3,%o5,%g3
+ st %g3,[res_ptr+28]
+ addx %g0,%g0,%o4 ! save cy in register
+ addcc size,-8,size
+ add s1_ptr,32,s1_ptr
+ add s2_ptr,32,s2_ptr
+ add res_ptr,32,res_ptr
+ bge L(loop2)
+ subcc %g0,%o4,%g0 ! restore cy
+
+L(fin2):
+ addcc size,8-2,size
+ blt L(end2)
+ subcc %g0,%o4,%g0 ! restore cy
+L(loope2):
+ ldd [s1_ptr+0],%g2
+ ldd [s2_ptr+0],%o4
+ addxcc %g2,%o4,%g2
+ st %g2,[res_ptr+0]
+ addxcc %g3,%o5,%g3
+ st %g3,[res_ptr+4]
+ addx %g0,%g0,%o4 ! save cy in register
+ addcc size,-2,size
+ add s1_ptr,8,s1_ptr
+ add s2_ptr,8,s2_ptr
+ add res_ptr,8,res_ptr
+ bge L(loope2)
+ subcc %g0,%o4,%g0 ! restore cy
+L(end2):
+ andcc size,1,%g0
+ be L(ret2)
+ subcc %g0,%o4,%g0 ! restore cy
+/* Add last limb */
+L(jone):
+ ld [s1_ptr],%g4
+ ld [s2_ptr],%g2
+ addxcc %g4,%g2,%o4
+ st %o4,[res_ptr]
+
+L(ret2):
+ retl
+ addx %g0,%g0,%o0 ! return carry-out from most sign. limb
+EPILOGUE(mpn_add_n)
diff --git a/mpn/sparc32/addmul_1.asm b/mpn/sparc32/addmul_1.asm
new file mode 100644
index 000000000..e8c2eaefa
--- /dev/null
+++ b/mpn/sparc32/addmul_1.asm
@@ -0,0 +1,146 @@
+! SPARC __mpn_addmul_1 -- Multiply a limb vector with a limb and add
+! the result to a second limb vector.
+
+! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr o0
+! s1_ptr o1
+! size o2
+! s2_limb o3
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+ ! Make S1_PTR and RES_PTR point at the end of their blocks
+ ! and put (- 4 x SIZE) in index/loop counter.
+ sll %o2,2,%o2
+ add %o0,%o2,%o4 ! RES_PTR in o4 since o0 is retval
+ add %o1,%o2,%o1
+ sub %g0,%o2,%o2
+
+ cmp %o3,0xfff
+ bgu L(large)
+ nop
+
+ ld [%o1+%o2],%o5
+ mov 0,%o0
+ b L(0)
+ add %o4,-4,%o4
+L(loop0):
+ addcc %o5,%g1,%g1
+ ld [%o1+%o2],%o5
+ addx %o0,%g0,%o0
+ st %g1,[%o4+%o2]
+L(0): wr %g0,%o3,%y
+ sra %o5,31,%g2
+ and %o3,%g2,%g2
+ andcc %g1,0,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,0,%g1
+ sra %g1,20,%g4
+ sll %g1,12,%g1
+ rd %y,%g3
+ srl %g3,20,%g3
+ or %g1,%g3,%g1
+
+ addcc %g1,%o0,%g1
+ addx %g2,%g4,%o0 ! add sign-compensation and cy to hi limb
+ addcc %o2,4,%o2 ! loop counter
+ bne L(loop0)
+ ld [%o4+%o2],%o5
+
+ addcc %o5,%g1,%g1
+ addx %o0,%g0,%o0
+ retl
+ st %g1,[%o4+%o2]
+
+L(large):
+ ld [%o1+%o2],%o5
+ mov 0,%o0
+ sra %o3,31,%g4 ! g4 = mask of ones iff S2_LIMB < 0
+ b L(1)
+ add %o4,-4,%o4
+L(loop):
+ addcc %o5,%g3,%g3
+ ld [%o1+%o2],%o5
+ addx %o0,%g0,%o0
+ st %g3,[%o4+%o2]
+L(1): wr %g0,%o5,%y
+ and %o5,%g4,%g2
+ andcc %g0,%g0,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%g0,%g1
+ rd %y,%g3
+ addcc %g3,%o0,%g3
+ addx %g2,%g1,%o0
+ addcc %o2,4,%o2
+ bne L(loop)
+ ld [%o4+%o2],%o5
+
+ addcc %o5,%g3,%g3
+ addx %o0,%g0,%o0
+ retl
+ st %g3,[%o4+%o2]
+EPILOGUE(mpn_addmul_1)
diff --git a/mpn/sparc32/lshift.asm b/mpn/sparc32/lshift.asm
new file mode 100644
index 000000000..de7d38354
--- /dev/null
+++ b/mpn/sparc32/lshift.asm
@@ -0,0 +1,96 @@
+! sparc __mpn_lshift --
+
+! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr %o0
+! src_ptr %o1
+! size %o2
+! cnt %o3
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+ sll %o2,2,%g1
+ add %o1,%g1,%o1 ! make %o1 point at end of src
+ ld [%o1-4],%g2 ! load first limb
+ sub %g0,%o3,%o5 ! negate shift count
+ add %o0,%g1,%o0 ! make %o0 point at end of res
+ add %o2,-1,%o2
+ andcc %o2,4-1,%g4 ! number of limbs in first loop
+ srl %g2,%o5,%g1 ! compute function result
+ be L(0) ! if multiple of 4 limbs, skip first loop
+ st %g1,[%sp+80]
+
+ sub %o2,%g4,%o2 ! adjust count for main loop
+
+L(loop0):
+ ld [%o1-8],%g3
+ add %o0,-4,%o0
+ add %o1,-4,%o1
+ addcc %g4,-1,%g4
+ sll %g2,%o3,%o4
+ srl %g3,%o5,%g1
+ mov %g3,%g2
+ or %o4,%g1,%o4
+ bne L(loop0)
+ st %o4,[%o0+0]
+
+L(0): tst %o2
+ be L(end)
+ nop
+
+L(loop):
+ ld [%o1-8],%g3
+ add %o0,-16,%o0
+ addcc %o2,-4,%o2
+ sll %g2,%o3,%o4
+ srl %g3,%o5,%g1
+
+ ld [%o1-12],%g2
+ sll %g3,%o3,%g4
+ or %o4,%g1,%o4
+ st %o4,[%o0+12]
+ srl %g2,%o5,%g1
+
+ ld [%o1-16],%g3
+ sll %g2,%o3,%o4
+ or %g4,%g1,%g4
+ st %g4,[%o0+8]
+ srl %g3,%o5,%g1
+
+ ld [%o1-20],%g2
+ sll %g3,%o3,%g4
+ or %o4,%g1,%o4
+ st %o4,[%o0+4]
+ srl %g2,%o5,%g1
+
+ add %o1,-16,%o1
+ or %g4,%g1,%g4
+ bne L(loop)
+ st %g4,[%o0+0]
+
+L(end): sll %g2,%o3,%g2
+ st %g2,[%o0-4]
+ retl
+ ld [%sp+80],%o0
+EPILOGUE(mpn_lshift)
diff --git a/mpn/sparc32/mul_1.asm b/mpn/sparc32/mul_1.asm
new file mode 100644
index 000000000..fb944a9d6
--- /dev/null
+++ b/mpn/sparc32/mul_1.asm
@@ -0,0 +1,199 @@
+! SPARC __mpn_mul_1 -- Multiply a limb vector with a limb and store
+! the result in a second limb vector.
+
+! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr o0
+! s1_ptr o1
+! size o2
+! s2_limb o3
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+ ! Make S1_PTR and RES_PTR point at the end of their blocks
+ ! and put (- 4 x SIZE) in index/loop counter.
+ sll %o2,2,%o2
+ add %o0,%o2,%o4 ! RES_PTR in o4 since o0 is retval
+ add %o1,%o2,%o1
+ sub %g0,%o2,%o2
+
+ cmp %o3,0xfff
+ bgu L(large)
+ nop
+
+ ld [%o1+%o2],%o5
+ mov 0,%o0
+ b L(0)
+ add %o4,-4,%o4
+L(loop0):
+ st %g1,[%o4+%o2]
+L(0): wr %g0,%o3,%y
+ sra %o5,31,%g2
+ and %o3,%g2,%g2
+ andcc %g1,0,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,0,%g1
+ sra %g1,20,%g4
+ sll %g1,12,%g1
+ rd %y,%g3
+ srl %g3,20,%g3
+ or %g1,%g3,%g1
+
+ addcc %g1,%o0,%g1
+ addx %g2,%g4,%o0 ! add sign-compensation and cy to hi limb
+ addcc %o2,4,%o2 ! loop counter
+ bne,a L(loop0)
+ ld [%o1+%o2],%o5
+
+ retl
+ st %g1,[%o4+%o2]
+
+
+L(large):
+ ld [%o1+%o2],%o5
+ mov 0,%o0
+ sra %o3,31,%g4 ! g4 = mask of ones iff S2_LIMB < 0
+ b L(1)
+ add %o4,-4,%o4
+L(loop):
+ st %g3,[%o4+%o2]
+L(1): wr %g0,%o5,%y
+ and %o5,%g4,%g2 ! g2 = S1_LIMB iff S2_LIMB < 0, else 0
+ andcc %g0,%g0,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%g0,%g1
+ rd %y,%g3
+ addcc %g3,%o0,%g3
+ addx %g2,%g1,%o0 ! add sign-compensation and cy to hi limb
+ addcc %o2,4,%o2 ! loop counter
+ bne,a L(loop)
+ ld [%o1+%o2],%o5
+
+ retl
+ st %g3,[%o4+%o2]
+EPILOGUE(mpn_mul_1)
+
+! ADD CODE FOR SMALL MULTIPLIERS!
+!1: ld
+! st
+!
+!2: ld ,a
+! addxcc a,a,x
+! st x,
+!
+!3_unrolled:
+! ld ,a
+! addxcc a,a,x1 ! 2a + cy
+! addx %g0,%g0,x2
+! addcc a,x1,x ! 3a + c
+! st x,
+!
+! ld ,a
+! addxcc a,a,y1
+! addx %g0,%g0,y2
+! addcc a,y1,x
+! st x,
+!
+!4_unrolled:
+! ld ,a
+! srl a,2,x1 ! 4a
+! addxcc y2,x1,x
+! sll a,30,x2
+! st x,
+!
+! ld ,a
+! srl a,2,y1
+! addxcc x2,y1,y
+! sll a,30,y2
+! st x,
+!
+!5_unrolled:
+! ld ,a
+! srl a,2,x1 ! 4a
+! addxcc a,x1,x ! 5a + c
+! sll a,30,x2
+! addx %g0,x2,x2
+! st x,
+!
+! ld ,a
+! srl a,2,y1
+! addxcc a,y1,x
+! sll a,30,y2
+! addx %g0,y2,y2
+! st x,
+!
+!8_unrolled:
+! ld ,a
+! srl a,3,x1 ! 8a
+! addxcc y2,x1,x
+! sll a,29,x2
+! st x,
+!
+! ld ,a
+! srl a,3,y1
+! addxcc x2,y1,y
+! sll a,29,y2
+! st x,
diff --git a/mpn/sparc32/rshift.asm b/mpn/sparc32/rshift.asm
new file mode 100644
index 000000000..4489d964b
--- /dev/null
+++ b/mpn/sparc32/rshift.asm
@@ -0,0 +1,93 @@
+! sparc __mpn_rshift --
+
+! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr %o0
+! src_ptr %o1
+! size %o2
+! cnt %o3
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+ ld [%o1],%g2 ! load first limb
+ sub %g0,%o3,%o5 ! negate shift count
+ add %o2,-1,%o2
+ andcc %o2,4-1,%g4 ! number of limbs in first loop
+ sll %g2,%o5,%g1 ! compute function result
+ be L(0) ! if multiple of 4 limbs, skip first loop
+ st %g1,[%sp+80]
+
+ sub %o2,%g4,%o2 ! adjust count for main loop
+
+L(loop0):
+ ld [%o1+4],%g3
+ add %o0,4,%o0
+ add %o1,4,%o1
+ addcc %g4,-1,%g4
+ srl %g2,%o3,%o4
+ sll %g3,%o5,%g1
+ mov %g3,%g2
+ or %o4,%g1,%o4
+ bne L(loop0)
+ st %o4,[%o0-4]
+
+L(0): tst %o2
+ be L(end)
+ nop
+
+L(loop):
+ ld [%o1+4],%g3
+ add %o0,16,%o0
+ addcc %o2,-4,%o2
+ srl %g2,%o3,%o4
+ sll %g3,%o5,%g1
+
+ ld [%o1+8],%g2
+ srl %g3,%o3,%g4
+ or %o4,%g1,%o4
+ st %o4,[%o0-16]
+ sll %g2,%o5,%g1
+
+ ld [%o1+12],%g3
+ srl %g2,%o3,%o4
+ or %g4,%g1,%g4
+ st %g4,[%o0-12]
+ sll %g3,%o5,%g1
+
+ ld [%o1+16],%g2
+ srl %g3,%o3,%g4
+ or %o4,%g1,%o4
+ st %o4,[%o0-8]
+ sll %g2,%o5,%g1
+
+ add %o1,16,%o1
+ or %g4,%g1,%g4
+ bne L(loop)
+ st %g4,[%o0-4]
+
+L(end): srl %g2,%o3,%g2
+ st %g2,[%o0-0]
+ retl
+ ld [%sp+80],%o0
+EPILOGUE(mpn_rshift)
diff --git a/mpn/sparc32/sub_n.asm b/mpn/sparc32/sub_n.asm
new file mode 100644
index 000000000..5cb982fbb
--- /dev/null
+++ b/mpn/sparc32/sub_n.asm
@@ -0,0 +1,326 @@
+! SPARC __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+! store difference in a third limb vector.
+
+! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+define(res_ptr,%o0)
+define(s1_ptr,%o1)
+define(s2_ptr,%o2)
+define(size,%o3)
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_sub_n)
+ xor s2_ptr,res_ptr,%g1
+ andcc %g1,4,%g0
+ bne L(1) ! branch if alignment differs
+ nop
+! ** V1a **
+ andcc res_ptr,4,%g0 ! res_ptr unaligned? Side effect: cy=0
+ be L(v1) ! if no, branch
+ nop
+/* Add least significant limb separately to align res_ptr and s2_ptr */
+ ld [s1_ptr],%g4
+ add s1_ptr,4,s1_ptr
+ ld [s2_ptr],%g2
+ add s2_ptr,4,s2_ptr
+ add size,-1,size
+ subcc %g4,%g2,%o4
+ st %o4,[res_ptr]
+ add res_ptr,4,res_ptr
+L(v1): addx %g0,%g0,%o4 ! save cy in register
+ cmp size,2 ! if size < 2 ...
+ bl L(end2) ! ... branch to tail code
+ subcc %g0,%o4,%g0 ! restore cy
+
+ ld [s1_ptr+0],%g4
+ addcc size,-10,size
+ ld [s1_ptr+4],%g1
+ ldd [s2_ptr+0],%g2
+ blt L(fin1)
+ subcc %g0,%o4,%g0 ! restore cy
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+L(loop1):
+ subxcc %g4,%g2,%o4
+ ld [s1_ptr+8],%g4
+ subxcc %g1,%g3,%o5
+ ld [s1_ptr+12],%g1
+ ldd [s2_ptr+8],%g2
+ std %o4,[res_ptr+0]
+ subxcc %g4,%g2,%o4
+ ld [s1_ptr+16],%g4
+ subxcc %g1,%g3,%o5
+ ld [s1_ptr+20],%g1
+ ldd [s2_ptr+16],%g2
+ std %o4,[res_ptr+8]
+ subxcc %g4,%g2,%o4
+ ld [s1_ptr+24],%g4
+ subxcc %g1,%g3,%o5
+ ld [s1_ptr+28],%g1
+ ldd [s2_ptr+24],%g2
+ std %o4,[res_ptr+16]
+ subxcc %g4,%g2,%o4
+ ld [s1_ptr+32],%g4
+ subxcc %g1,%g3,%o5
+ ld [s1_ptr+36],%g1
+ ldd [s2_ptr+32],%g2
+ std %o4,[res_ptr+24]
+ addx %g0,%g0,%o4 ! save cy in register
+ addcc size,-8,size
+ add s1_ptr,32,s1_ptr
+ add s2_ptr,32,s2_ptr
+ add res_ptr,32,res_ptr
+ bge L(loop1)
+ subcc %g0,%o4,%g0 ! restore cy
+
+L(fin1):
+ addcc size,8-2,size
+ blt L(end1)
+ subcc %g0,%o4,%g0 ! restore cy
+/* Add blocks of 2 limbs until less than 2 limbs remain */
+L(loope1):
+ subxcc %g4,%g2,%o4
+ ld [s1_ptr+8],%g4
+ subxcc %g1,%g3,%o5
+ ld [s1_ptr+12],%g1
+ ldd [s2_ptr+8],%g2
+ std %o4,[res_ptr+0]
+ addx %g0,%g0,%o4 ! save cy in register
+ addcc size,-2,size
+ add s1_ptr,8,s1_ptr
+ add s2_ptr,8,s2_ptr
+ add res_ptr,8,res_ptr
+ bge L(loope1)
+ subcc %g0,%o4,%g0 ! restore cy
+L(end1):
+ subxcc %g4,%g2,%o4
+ subxcc %g1,%g3,%o5
+ std %o4,[res_ptr+0]
+ addx %g0,%g0,%o4 ! save cy in register
+
+ andcc size,1,%g0
+ be L(ret1)
+ subcc %g0,%o4,%g0 ! restore cy
+/* Add last limb */
+ ld [s1_ptr+8],%g4
+ ld [s2_ptr+8],%g2
+ subxcc %g4,%g2,%o4
+ st %o4,[res_ptr+8]
+
+L(ret1):
+ retl
+ addx %g0,%g0,%o0 ! return carry-out from most sign. limb
+
+L(1): xor s1_ptr,res_ptr,%g1
+ andcc %g1,4,%g0
+ bne L(2)
+ nop
+! ** V1b **
+ andcc res_ptr,4,%g0 ! res_ptr unaligned? Side effect: cy=0
+ be L(v1b) ! if no, branch
+ nop
+/* Add least significant limb separately to align res_ptr and s1_ptr */
+ ld [s2_ptr],%g4
+ add s2_ptr,4,s2_ptr
+ ld [s1_ptr],%g2
+ add s1_ptr,4,s1_ptr
+ add size,-1,size
+ subcc %g2,%g4,%o4
+ st %o4,[res_ptr]
+ add res_ptr,4,res_ptr
+L(v1b): addx %g0,%g0,%o4 ! save cy in register
+ cmp size,2 ! if size < 2 ...
+ bl L(end2) ! ... branch to tail code
+ subcc %g0,%o4,%g0 ! restore cy
+
+ ld [s2_ptr+0],%g4
+ addcc size,-10,size
+ ld [s2_ptr+4],%g1
+ ldd [s1_ptr+0],%g2
+ blt L(fin1b)
+ subcc %g0,%o4,%g0 ! restore cy
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+L(loop1b):
+ subxcc %g2,%g4,%o4
+ ld [s2_ptr+8],%g4
+ subxcc %g3,%g1,%o5
+ ld [s2_ptr+12],%g1
+ ldd [s1_ptr+8],%g2
+ std %o4,[res_ptr+0]
+ subxcc %g2,%g4,%o4
+ ld [s2_ptr+16],%g4
+ subxcc %g3,%g1,%o5
+ ld [s2_ptr+20],%g1
+ ldd [s1_ptr+16],%g2
+ std %o4,[res_ptr+8]
+ subxcc %g2,%g4,%o4
+ ld [s2_ptr+24],%g4
+ subxcc %g3,%g1,%o5
+ ld [s2_ptr+28],%g1
+ ldd [s1_ptr+24],%g2
+ std %o4,[res_ptr+16]
+ subxcc %g2,%g4,%o4
+ ld [s2_ptr+32],%g4
+ subxcc %g3,%g1,%o5
+ ld [s2_ptr+36],%g1
+ ldd [s1_ptr+32],%g2
+ std %o4,[res_ptr+24]
+ addx %g0,%g0,%o4 ! save cy in register
+ addcc size,-8,size
+ add s1_ptr,32,s1_ptr
+ add s2_ptr,32,s2_ptr
+ add res_ptr,32,res_ptr
+ bge L(loop1b)
+ subcc %g0,%o4,%g0 ! restore cy
+
+L(fin1b):
+ addcc size,8-2,size
+ blt L(end1b)
+ subcc %g0,%o4,%g0 ! restore cy
+/* Add blocks of 2 limbs until less than 2 limbs remain */
+L(loope1b):
+ subxcc %g2,%g4,%o4
+ ld [s2_ptr+8],%g4
+ subxcc %g3,%g1,%o5
+ ld [s2_ptr+12],%g1
+ ldd [s1_ptr+8],%g2
+ std %o4,[res_ptr+0]
+ addx %g0,%g0,%o4 ! save cy in register
+ addcc size,-2,size
+ add s1_ptr,8,s1_ptr
+ add s2_ptr,8,s2_ptr
+ add res_ptr,8,res_ptr
+ bge L(loope1b)
+ subcc %g0,%o4,%g0 ! restore cy
+L(end1b):
+ subxcc %g2,%g4,%o4
+ subxcc %g3,%g1,%o5
+ std %o4,[res_ptr+0]
+ addx %g0,%g0,%o4 ! save cy in register
+
+ andcc size,1,%g0
+ be L(ret1b)
+ subcc %g0,%o4,%g0 ! restore cy
+/* Add last limb */
+ ld [s2_ptr+8],%g4
+ ld [s1_ptr+8],%g2
+ subxcc %g2,%g4,%o4
+ st %o4,[res_ptr+8]
+
+L(ret1b):
+ retl
+ addx %g0,%g0,%o0 ! return carry-out from most sign. limb
+
+! ** V2 **
+/* If we come here, the alignment of s1_ptr and res_ptr as well as the
+ alignment of s2_ptr and res_ptr differ. Since there are only two ways
+ things can be aligned (that we care about) we now know that the alignment
+ of s1_ptr and s2_ptr are the same. */
+
+L(2): cmp size,1
+ be L(jone)
+ nop
+ andcc s1_ptr,4,%g0 ! s1_ptr unaligned? Side effect: cy=0
+ be L(v2) ! if no, branch
+ nop
+/* Add least significant limb separately to align s1_ptr and s2_ptr */
+ ld [s1_ptr],%g4
+ add s1_ptr,4,s1_ptr
+ ld [s2_ptr],%g2
+ add s2_ptr,4,s2_ptr
+ add size,-1,size
+ subcc %g4,%g2,%o4
+ st %o4,[res_ptr]
+ add res_ptr,4,res_ptr
+
+L(v2): addx %g0,%g0,%o4 ! save cy in register
+ addcc size,-8,size
+ blt L(fin2)
+ subcc %g0,%o4,%g0 ! restore cy
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+L(loop2):
+ ldd [s1_ptr+0],%g2
+ ldd [s2_ptr+0],%o4
+ subxcc %g2,%o4,%g2
+ st %g2,[res_ptr+0]
+ subxcc %g3,%o5,%g3
+ st %g3,[res_ptr+4]
+ ldd [s1_ptr+8],%g2
+ ldd [s2_ptr+8],%o4
+ subxcc %g2,%o4,%g2
+ st %g2,[res_ptr+8]
+ subxcc %g3,%o5,%g3
+ st %g3,[res_ptr+12]
+ ldd [s1_ptr+16],%g2
+ ldd [s2_ptr+16],%o4
+ subxcc %g2,%o4,%g2
+ st %g2,[res_ptr+16]
+ subxcc %g3,%o5,%g3
+ st %g3,[res_ptr+20]
+ ldd [s1_ptr+24],%g2
+ ldd [s2_ptr+24],%o4
+ subxcc %g2,%o4,%g2
+ st %g2,[res_ptr+24]
+ subxcc %g3,%o5,%g3
+ st %g3,[res_ptr+28]
+ addx %g0,%g0,%o4 ! save cy in register
+ addcc size,-8,size
+ add s1_ptr,32,s1_ptr
+ add s2_ptr,32,s2_ptr
+ add res_ptr,32,res_ptr
+ bge L(loop2)
+ subcc %g0,%o4,%g0 ! restore cy
+
+L(fin2):
+ addcc size,8-2,size
+ blt L(end2)
+ subcc %g0,%o4,%g0 ! restore cy
+L(loope2):
+ ldd [s1_ptr+0],%g2
+ ldd [s2_ptr+0],%o4
+ subxcc %g2,%o4,%g2
+ st %g2,[res_ptr+0]
+ subxcc %g3,%o5,%g3
+ st %g3,[res_ptr+4]
+ addx %g0,%g0,%o4 ! save cy in register
+ addcc size,-2,size
+ add s1_ptr,8,s1_ptr
+ add s2_ptr,8,s2_ptr
+ add res_ptr,8,res_ptr
+ bge L(loope2)
+ subcc %g0,%o4,%g0 ! restore cy
+L(end2):
+ andcc size,1,%g0
+ be L(ret2)
+ subcc %g0,%o4,%g0 ! restore cy
+/* Add last limb */
+L(jone):
+ ld [s1_ptr],%g4
+ ld [s2_ptr],%g2
+ subxcc %g4,%g2,%o4
+ st %o4,[res_ptr]
+
+L(ret2):
+ retl
+ addx %g0,%g0,%o0 ! return carry-out from most sign. limb
+EPILOGUE(mpn_sub_n)
diff --git a/mpn/sparc32/submul_1.asm b/mpn/sparc32/submul_1.asm
new file mode 100644
index 000000000..9221812ca
--- /dev/null
+++ b/mpn/sparc32/submul_1.asm
@@ -0,0 +1,146 @@
+! SPARC __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
+! the result from a second limb vector.
+
+! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr o0
+! s1_ptr o1
+! size o2
+! s2_limb o3
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+ ! Make S1_PTR and RES_PTR point at the end of their blocks
+ ! and put (- 4 x SIZE) in index/loop counter.
+ sll %o2,2,%o2
+ add %o0,%o2,%o4 ! RES_PTR in o4 since o0 is retval
+ add %o1,%o2,%o1
+ sub %g0,%o2,%o2
+
+ cmp %o3,0xfff
+ bgu L(large)
+ nop
+
+ ld [%o1+%o2],%o5
+ mov 0,%o0
+ b L(0)
+ add %o4,-4,%o4
+L(loop0):
+ subcc %o5,%g1,%g1
+ ld [%o1+%o2],%o5
+ addx %o0,%g0,%o0
+ st %g1,[%o4+%o2]
+L(0): wr %g0,%o3,%y
+ sra %o5,31,%g2
+ and %o3,%g2,%g2
+ andcc %g1,0,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,0,%g1
+ sra %g1,20,%g4
+ sll %g1,12,%g1
+ rd %y,%g3
+ srl %g3,20,%g3
+ or %g1,%g3,%g1
+
+ addcc %g1,%o0,%g1
+ addx %g2,%g4,%o0 ! add sign-compensation and cy to hi limb
+ addcc %o2,4,%o2 ! loop counter
+ bne L(loop0)
+ ld [%o4+%o2],%o5
+
+ subcc %o5,%g1,%g1
+ addx %o0,%g0,%o0
+ retl
+ st %g1,[%o4+%o2]
+
+L(large):
+ ld [%o1+%o2],%o5
+ mov 0,%o0
+ sra %o3,31,%g4 ! g4 = mask of ones iff S2_LIMB < 0
+ b L(1)
+ add %o4,-4,%o4
+L(loop):
+ subcc %o5,%g3,%g3
+ ld [%o1+%o2],%o5
+ addx %o0,%g0,%o0
+ st %g3,[%o4+%o2]
+L(1): wr %g0,%o5,%y
+ and %o5,%g4,%g2
+ andcc %g0,%g0,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%g0,%g1
+ rd %y,%g3
+ addcc %g3,%o0,%g3
+ addx %g2,%g1,%o0
+ addcc %o2,4,%o2
+ bne L(loop)
+ ld [%o4+%o2],%o5
+
+ subcc %o5,%g3,%g3
+ addx %o0,%g0,%o0
+ retl
+ st %g3,[%o4+%o2]
+EPILOGUE(mpn_submul_1)
diff --git a/mpn/sparc32/udiv_fp.asm b/mpn/sparc32/udiv_fp.asm
new file mode 100644
index 000000000..682a4be51
--- /dev/null
+++ b/mpn/sparc32/udiv_fp.asm
@@ -0,0 +1,142 @@
+! SPARC v7 __udiv_qrnnd division support, used from longlong.h.
+! This is for v7 CPUs with a floating-point unit.
+
+! Copyright (C) 1993, 1994, 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! rem_ptr i0
+! n1 i1
+! n0 i2
+! d i3
+
+include(`../config.m4')
+
+ASM_START()
+ RODATA
+ ALIGN(8)
+L(C0): .double 0r4294967296
+L(C1): .double 0r2147483648
+
+PROLOGUE(__udiv_qrnnd)
+ save %sp,-104,%sp
+ st %i1,[%fp-8]
+ ld [%fp-8],%f10
+ sethi %hi(L(C0)),%o7
+ fitod %f10,%f4
+ ldd [%o7+%lo(L(C0))],%f8
+ cmp %i1,0
+ bge L(248)
+ mov %i0,%i5
+ faddd %f4,%f8,%f4
+L(248):
+ st %i2,[%fp-8]
+ ld [%fp-8],%f10
+ fmuld %f4,%f8,%f6
+ cmp %i2,0
+ bge L(249)
+ fitod %f10,%f2
+ faddd %f2,%f8,%f2
+L(249):
+ st %i3,[%fp-8]
+ faddd %f6,%f2,%f2
+ ld [%fp-8],%f10
+ cmp %i3,0
+ bge L(250)
+ fitod %f10,%f4
+ faddd %f4,%f8,%f4
+L(250):
+ fdivd %f2,%f4,%f2
+ sethi %hi(L(C1)),%o7
+ ldd [%o7+%lo(L(C1))],%f4
+ fcmped %f2,%f4
+ nop
+ fbge,a L(251)
+ fsubd %f2,%f4,%f2
+ fdtoi %f2,%f2
+ st %f2,[%fp-8]
+ b L(252)
+ ld [%fp-8],%i4
+L(251):
+ fdtoi %f2,%f2
+ st %f2,[%fp-8]
+ ld [%fp-8],%i4
+ sethi %hi(-2147483648),%g2
+ xor %i4,%g2,%i4
+L(252):
+ wr %g0,%i4,%y
+ sra %i3,31,%g2
+ and %i4,%g2,%g2
+ andcc %g0,0,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,0,%g1
+ add %g1,%g2,%i0
+ rd %y,%g3
+ subcc %i2,%g3,%o7
+ subxcc %i1,%i0,%g0
+ be L(253)
+ cmp %o7,%i3
+
+ add %i4,-1,%i0
+ add %o7,%i3,%o7
+ st %o7,[%i5]
+ ret
+ restore
+L(253):
+ blu L(246)
+ mov %i4,%i0
+ add %i4,1,%i0
+ sub %o7,%i3,%o7
+L(246):
+ st %o7,[%i5]
+ ret
+ restore
+EPILOGUE(__udiv_qrnnd)
diff --git a/mpn/sparc32/udiv_nfp.asm b/mpn/sparc32/udiv_nfp.asm
new file mode 100644
index 000000000..bdb868d9e
--- /dev/null
+++ b/mpn/sparc32/udiv_nfp.asm
@@ -0,0 +1,193 @@
+! SPARC v7 __udiv_qrnnd division support, used from longlong.h.
+! This is for v7 CPUs without a floating-point unit.
+
+! Copyright (C) 1993, 1994, 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! rem_ptr o0
+! n1 o1
+! n0 o2
+! d o3
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(__udiv_qrnnd)
+ tst %o3
+ bneg L(largedivisor)
+ mov 8,%g1
+
+ b L(p1)
+ addxcc %o2,%o2,%o2
+
+L(plop):
+ bcc L(n1)
+ addxcc %o2,%o2,%o2
+L(p1): addx %o1,%o1,%o1
+ subcc %o1,%o3,%o4
+ bcc L(n2)
+ addxcc %o2,%o2,%o2
+L(p2): addx %o1,%o1,%o1
+ subcc %o1,%o3,%o4
+ bcc L(n3)
+ addxcc %o2,%o2,%o2
+L(p3): addx %o1,%o1,%o1
+ subcc %o1,%o3,%o4
+ bcc L(n4)
+ addxcc %o2,%o2,%o2
+L(p4): addx %o1,%o1,%o1
+ addcc %g1,-1,%g1
+ bne L(plop)
+ subcc %o1,%o3,%o4
+ bcc L(n5)
+ addxcc %o2,%o2,%o2
+L(p5): st %o1,[%o0]
+ retl
+ xnor %g0,%o2,%o0
+
+L(nlop):
+ bcc L(p1)
+ addxcc %o2,%o2,%o2
+L(n1): addx %o4,%o4,%o4
+ subcc %o4,%o3,%o1
+ bcc L(p2)
+ addxcc %o2,%o2,%o2
+L(n2): addx %o4,%o4,%o4
+ subcc %o4,%o3,%o1
+ bcc L(p3)
+ addxcc %o2,%o2,%o2
+L(n3): addx %o4,%o4,%o4
+ subcc %o4,%o3,%o1
+ bcc L(p4)
+ addxcc %o2,%o2,%o2
+L(n4): addx %o4,%o4,%o4
+ addcc %g1,-1,%g1
+ bne L(nlop)
+ subcc %o4,%o3,%o1
+ bcc L(p5)
+ addxcc %o2,%o2,%o2
+L(n5): st %o4,[%o0]
+ retl
+ xnor %g0,%o2,%o0
+
+L(largedivisor):
+ and %o2,1,%o5 ! %o5 = n0 & 1
+
+ srl %o2,1,%o2
+ sll %o1,31,%g2
+ or %g2,%o2,%o2 ! %o2 = lo(n1n0 >> 1)
+ srl %o1,1,%o1 ! %o1 = hi(n1n0 >> 1)
+
+ and %o3,1,%g2
+ srl %o3,1,%g3 ! %g3 = floor(d / 2)
+ add %g3,%g2,%g3 ! %g3 = ceil(d / 2)
+
+ b L(Lp1)
+ addxcc %o2,%o2,%o2
+
+L(Lplop):
+ bcc L(Ln1)
+ addxcc %o2,%o2,%o2
+L(Lp1): addx %o1,%o1,%o1
+ subcc %o1,%g3,%o4
+ bcc L(Ln2)
+ addxcc %o2,%o2,%o2
+L(Lp2): addx %o1,%o1,%o1
+ subcc %o1,%g3,%o4
+ bcc L(Ln3)
+ addxcc %o2,%o2,%o2
+L(Lp3): addx %o1,%o1,%o1
+ subcc %o1,%g3,%o4
+ bcc L(Ln4)
+ addxcc %o2,%o2,%o2
+L(Lp4): addx %o1,%o1,%o1
+ addcc %g1,-1,%g1
+ bne L(Lplop)
+ subcc %o1,%g3,%o4
+ bcc L(Ln5)
+ addxcc %o2,%o2,%o2
+L(Lp5): add %o1,%o1,%o1 ! << 1
+ tst %g2
+ bne L(oddp)
+ add %o5,%o1,%o1
+ st %o1,[%o0]
+ retl
+ xnor %g0,%o2,%o0
+
+L(Lnlop):
+ bcc L(Lp1)
+ addxcc %o2,%o2,%o2
+L(Ln1): addx %o4,%o4,%o4
+ subcc %o4,%g3,%o1
+ bcc L(Lp2)
+ addxcc %o2,%o2,%o2
+L(Ln2): addx %o4,%o4,%o4
+ subcc %o4,%g3,%o1
+ bcc L(Lp3)
+ addxcc %o2,%o2,%o2
+L(Ln3): addx %o4,%o4,%o4
+ subcc %o4,%g3,%o1
+ bcc L(Lp4)
+ addxcc %o2,%o2,%o2
+L(Ln4): addx %o4,%o4,%o4
+ addcc %g1,-1,%g1
+ bne L(Lnlop)
+ subcc %o4,%g3,%o1
+ bcc L(Lp5)
+ addxcc %o2,%o2,%o2
+L(Ln5): add %o4,%o4,%o4 ! << 1
+ tst %g2
+ bne L(oddn)
+ add %o5,%o4,%o4
+ st %o4,[%o0]
+ retl
+ xnor %g0,%o2,%o0
+
+L(oddp):
+ xnor %g0,%o2,%o2
+ ! q' in %o2. r' in %o1
+ addcc %o1,%o2,%o1
+ bcc L(Lp6)
+ addx %o2,0,%o2
+ sub %o1,%o3,%o1
+L(Lp6): subcc %o1,%o3,%g0
+ bcs L(Lp7)
+ subx %o2,-1,%o2
+ sub %o1,%o3,%o1
+L(Lp7): st %o1,[%o0]
+ retl
+ mov %o2,%o0
+
+L(oddn):
+ xnor %g0,%o2,%o2
+ ! q' in %o2. r' in %o4
+ addcc %o4,%o2,%o4
+ bcc L(Ln6)
+ addx %o2,0,%o2
+ sub %o4,%o3,%o4
+L(Ln6): subcc %o4,%o3,%g0
+ bcs L(Ln7)
+ subx %o2,-1,%o2
+ sub %o4,%o3,%o4
+L(Ln7): st %o4,[%o0]
+ retl
+ mov %o2,%o0
+EPILOGUE(__udiv_qrnnd)
diff --git a/mpn/sparc32/umul.asm b/mpn/sparc32/umul.asm
new file mode 100644
index 000000000..ab3e8e699
--- /dev/null
+++ b/mpn/sparc32/umul.asm
@@ -0,0 +1,67 @@
+! SPARC __umul_ppmm -- support for longlong.h for non-gcc.
+
+! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(__umul_ppmm)
+ wr %g0,%o1,%y
+ sra %o2,31,%g2 ! Don't move this insn
+ and %o1,%g2,%g2 ! Don't move this insn
+ andcc %g0,0,%g1 ! Don't move this insn
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,0,%g1
+ rd %y,%g3
+ st %g3,[%o0]
+ retl
+ add %g1,%g2,%o0
+EPILOGUE(__umul_ppmm)
diff --git a/mpn/sparc32/v8/addmul_1.asm b/mpn/sparc32/v8/addmul_1.asm
new file mode 100644
index 000000000..11b4bada0
--- /dev/null
+++ b/mpn/sparc32/v8/addmul_1.asm
@@ -0,0 +1,122 @@
+! SPARC v8 __mpn_addmul_1 -- Multiply a limb vector with a limb and
+! add the result to a second limb vector.
+
+! Copyright (C) 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr o0
+! s1_ptr o1
+! size o2
+! s2_limb o3
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+ orcc %g0,%g0,%g2
+ ld [%o1+0],%o4 ! 1
+
+ sll %o2,4,%g1
+ and %g1,(4-1)<<4,%g1
+#if PIC
+ mov %o7,%g4 ! Save return address register
+ call 1f
+ add %o7,L(1)-1f,%g3
+1: mov %g4,%o7 ! Restore return address register
+#else
+ sethi %hi(L(1)),%g3
+ or %g3,%lo(L(1)),%g3
+#endif
+ jmp %g3+%g1
+ nop
+L(1):
+L(L00): add %o0,-4,%o0
+ b L(loop00) /* 4, 8, 12, ... */
+ add %o1,-4,%o1
+ nop
+L(L01): b L(loop01) /* 1, 5, 9, ... */
+ nop
+ nop
+ nop
+L(L10): add %o0,-12,%o0 /* 2, 6, 10, ... */
+ b L(loop10)
+ add %o1,4,%o1
+ nop
+L(L11): add %o0,-8,%o0 /* 3, 7, 11, ... */
+ b L(loop11)
+ add %o1,-8,%o1
+ nop
+
+L(loop):
+ addcc %g3,%g2,%g3 ! 1
+ ld [%o1+4],%o4 ! 2
+ rd %y,%g2 ! 1
+ addx %g0,%g2,%g2
+ ld [%o0+0],%g1 ! 2
+ addcc %g1,%g3,%g3
+ st %g3,[%o0+0] ! 1
+L(loop00):
+ umul %o4,%o3,%g3 ! 2
+ ld [%o0+4],%g1 ! 2
+ addxcc %g3,%g2,%g3 ! 2
+ ld [%o1+8],%o4 ! 3
+ rd %y,%g2 ! 2
+ addx %g0,%g2,%g2
+ nop
+ addcc %g1,%g3,%g3
+ st %g3,[%o0+4] ! 2
+L(loop11):
+ umul %o4,%o3,%g3 ! 3
+ addxcc %g3,%g2,%g3 ! 3
+ ld [%o1+12],%o4 ! 4
+ rd %y,%g2 ! 3
+ add %o1,16,%o1
+ addx %g0,%g2,%g2
+ ld [%o0+8],%g1 ! 2
+ addcc %g1,%g3,%g3
+ st %g3,[%o0+8] ! 3
+L(loop10):
+ umul %o4,%o3,%g3 ! 4
+ addxcc %g3,%g2,%g3 ! 4
+ ld [%o1+0],%o4 ! 1
+ rd %y,%g2 ! 4
+ addx %g0,%g2,%g2
+ ld [%o0+12],%g1 ! 2
+ addcc %g1,%g3,%g3
+ st %g3,[%o0+12] ! 4
+ add %o0,16,%o0
+ addx %g0,%g2,%g2
+L(loop01):
+ addcc %o2,-4,%o2
+ bg L(loop)
+ umul %o4,%o3,%g3 ! 1
+
+ addcc %g3,%g2,%g3 ! 4
+ rd %y,%g2 ! 4
+ addx %g0,%g2,%g2
+ ld [%o0+0],%g1 ! 2
+ addcc %g1,%g3,%g3
+ st %g3,[%o0+0] ! 4
+ addx %g0,%g2,%o0
+
+ retl
+ nop
+EPILOGUE(mpn_addmul_1)
diff --git a/mpn/sparc32/v8/mul_1.asm b/mpn/sparc32/v8/mul_1.asm
new file mode 100644
index 000000000..020e06303
--- /dev/null
+++ b/mpn/sparc32/v8/mul_1.asm
@@ -0,0 +1,103 @@
+! SPARC v8 __mpn_mul_1 -- Multiply a limb vector with a single limb and
+! store the product in a second limb vector.
+
+! Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr o0
+! s1_ptr o1
+! size o2
+! s2_limb o3
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+ sll %o2,4,%g1
+ and %g1,(4-1)<<4,%g1
+#if PIC
+ mov %o7,%g4 ! Save return address register
+ call 1f
+ add %o7,L(L)-1f,%g3
+1: mov %g4,%o7 ! Restore return address register
+#else
+ sethi %hi(L(1)),%g3
+ or %g3,%lo(L(1)),%g3
+#endif
+ jmp %g3+%g1
+ ld [%o1+0],%o4 ! 1
+L(1):
+L(L00): add %o0,-4,%o0
+ add %o1,-4,%o1
+ b L(loop00) /* 4, 8, 12, ... */
+ orcc %g0,%g0,%g2
+L(L01): b L(loop01) /* 1, 5, 9, ... */
+ orcc %g0,%g0,%g2
+ nop
+ nop
+L(L10): add %o0,-12,%o0 /* 2, 6, 10, ... */
+ add %o1,4,%o1
+ b L(loop10)
+ orcc %g0,%g0,%g2
+ nop
+L(L11): add %o0,-8,%o0 /* 3, 7, 11, ... */
+ add %o1,-8,%o1
+ b L(loop11)
+ orcc %g0,%g0,%g2
+
+L(loop):
+ addcc %g3,%g2,%g3 ! 1
+ ld [%o1+4],%o4 ! 2
+ st %g3,[%o0+0] ! 1
+ rd %y,%g2 ! 1
+L(loop00):
+ umul %o4,%o3,%g3 ! 2
+ addxcc %g3,%g2,%g3 ! 2
+ ld [%o1+8],%o4 ! 3
+ st %g3,[%o0+4] ! 2
+ rd %y,%g2 ! 2
+L(loop11):
+ umul %o4,%o3,%g3 ! 3
+ addxcc %g3,%g2,%g3 ! 3
+ ld [%o1+12],%o4 ! 4
+ add %o1,16,%o1
+ st %g3,[%o0+8] ! 3
+ rd %y,%g2 ! 3
+L(loop10):
+ umul %o4,%o3,%g3 ! 4
+ addxcc %g3,%g2,%g3 ! 4
+ ld [%o1+0],%o4 ! 1
+ st %g3,[%o0+12] ! 4
+ add %o0,16,%o0
+ rd %y,%g2 ! 4
+ addx %g0,%g2,%g2
+L(loop01):
+ addcc %o2,-4,%o2
+ bg L(loop)
+ umul %o4,%o3,%g3 ! 1
+
+ addcc %g3,%g2,%g3 ! 4
+ st %g3,[%o0+0] ! 4
+ rd %y,%g2 ! 4
+
+ retl
+ addx %g0,%g2,%o0
+EPILOGUE(mpn_mul_1)
diff --git a/mpn/sparc32/v8/submul_1.asm b/mpn/sparc32/v8/submul_1.asm
new file mode 100644
index 000000000..467589d5c
--- /dev/null
+++ b/mpn/sparc32/v8/submul_1.asm
@@ -0,0 +1,58 @@
+! SPARC v8 __mpn_submul_1 -- Multiply a limb vector with a limb and
+! subtract the result from a second limb vector.
+
+! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr o0
+! s1_ptr o1
+! size o2
+! s2_limb o3
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+ sub %g0,%o2,%o2 ! negate ...
+ sll %o2,2,%o2 ! ... and scale size
+ sub %o1,%o2,%o1 ! o1 is offset s1_ptr
+ sub %o0,%o2,%g1 ! g1 is offset res_ptr
+
+ mov 0,%o0 ! clear cy_limb
+
+L(loop):
+ ld [%o1+%o2],%o4
+ ld [%g1+%o2],%g2
+ umul %o4,%o3,%o5
+ rd %y,%g3
+ addcc %o5,%o0,%o5
+ addx %g3,0,%o0
+ subcc %g2,%o5,%g2
+ addx %o0,0,%o0
+ st %g2,[%g1+%o2]
+
+ addcc %o2,4,%o2
+ bne L(loop)
+ nop
+
+ retl
+ nop
+EPILOGUE(mpn_submul_1)
diff --git a/mpn/sparc32/v8/supersparc/udiv.asm b/mpn/sparc32/v8/supersparc/udiv.asm
new file mode 100644
index 000000000..6089f11f0
--- /dev/null
+++ b/mpn/sparc32/v8/supersparc/udiv.asm
@@ -0,0 +1,106 @@
+! SuperSPARC __udiv_qrnnd division support, used from longlong.h.
+! This is for SuperSPARC only, to compensate for its semi-functional
+! udiv instruction.
+
+! Copyright (C) 1993, 1994, 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! rem_ptr i0
+! n1 i1
+! n0 i2
+! d i3
+
+include(`../config.m4')
+
+ASM_START()
+ RODATA
+ ALIGN(8)
+L(C0): .double 0r4294967296
+L(C1): .double 0r2147483648
+
+PROLOGUE(__udiv_qrnnd)
+ save %sp,-104,%sp
+ st %i1,[%fp-8]
+ ld [%fp-8],%f10
+ sethi %hi(L(C0)),%o7
+ fitod %f10,%f4
+ ldd [%o7+%lo(L(C0))],%f8
+ cmp %i1,0
+ bge L(248)
+ mov %i0,%i5
+ faddd %f4,%f8,%f4
+L(248):
+ st %i2,[%fp-8]
+ ld [%fp-8],%f10
+ fmuld %f4,%f8,%f6
+ cmp %i2,0
+ bge L(249)
+ fitod %f10,%f2
+ faddd %f2,%f8,%f2
+L(249):
+ st %i3,[%fp-8]
+ faddd %f6,%f2,%f2
+ ld [%fp-8],%f10
+ cmp %i3,0
+ bge L(250)
+ fitod %f10,%f4
+ faddd %f4,%f8,%f4
+L(250):
+ fdivd %f2,%f4,%f2
+ sethi %hi(L(C1)),%o7
+ ldd [%o7+%lo(L(C1))],%f4
+ fcmped %f2,%f4
+ nop
+ fbge,a L(251)
+ fsubd %f2,%f4,%f2
+ fdtoi %f2,%f2
+ st %f2,[%fp-8]
+ b L(252)
+ ld [%fp-8],%i4
+L(251):
+ fdtoi %f2,%f2
+ st %f2,[%fp-8]
+ ld [%fp-8],%i4
+ sethi %hi(-2147483648),%g2
+ xor %i4,%g2,%i4
+L(252):
+ umul %i3,%i4,%g3
+ rd %y,%i0
+ subcc %i2,%g3,%o7
+ subxcc %i1,%i0,%g0
+ be L(253)
+ cmp %o7,%i3
+
+ add %i4,-1,%i0
+ add %o7,%i3,%o7
+ st %o7,[%i5]
+ ret
+ restore
+L(253):
+ blu L(246)
+ mov %i4,%i0
+ add %i4,1,%i0
+ sub %o7,%i3,%o7
+L(246):
+ st %o7,[%i5]
+ ret
+ restore
+EPILOGUE(__udiv_qrnnd)
diff --git a/mpn/sparc32/v8/umul.asm b/mpn/sparc32/v8/umul.asm
new file mode 100644
index 000000000..411e8bc18
--- /dev/null
+++ b/mpn/sparc32/v8/umul.asm
@@ -0,0 +1,9 @@
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(__umul_ppmm)
+ umul %o1,%o2,%g2
+ st %g2,[%o0]
+ retl
+ rd %y,%o0
+EPILOGUE(__umul_ppmm)
diff --git a/mpn/sparc32/v9/addmul_1.asm b/mpn/sparc32/v9/addmul_1.asm
new file mode 100644
index 000000000..548f5774c
--- /dev/null
+++ b/mpn/sparc32/v9/addmul_1.asm
@@ -0,0 +1,282 @@
+! SPARC v9 32-bit __mpn_addmul_1 -- Multiply a limb vector with a limb and
+! add the result to a second limb vector.
+
+! Copyright (C) 1998 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr i0
+! s1_ptr i1
+! size i2
+! s2_limb i3
+
+include(`../config.m4')
+
+ASM_START()
+ RODATA
+ ALIGN(4)
+L(noll):
+ .word 0
+PROLOGUE(mpn_addmul_1)
+ save %sp,-256,%sp
+ sethi %hi(L(noll)),%g1
+ ld [%g1+%lo(L(noll))],%f10
+
+ sethi %hi(0xffff0000),%o0
+ andn %i3,%o0,%o0
+ st %o0,[%fp-16]
+ ld [%fp-16],%f11
+ fxtod %f10,%f6
+
+ srl %i3,16,%o0
+ st %o0,[%fp-16]
+ ld [%fp-16],%f11
+ fxtod %f10,%f8
+
+ mov 0,%g3 ! cy = 0
+
+ ld [%i1],%f11
+ subcc %i2,1,%i2
+ be,pn %icc,L(end1)
+ add %i1,4,%i1 ! s1_ptr++
+
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 ! s1_ptr++
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ subcc %i2,1,%i2
+ be,pn %icc,L(end2)
+ std %f12,[%fp-16]
+
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 ! s1_ptr++
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-40]
+ fdtox %f4,%f12
+ subcc %i2,1,%i2
+ be,pt %icc,L(end3)
+ std %f12,[%fp-32]
+
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 ! s1_ptr++
+ ld [%i0],%g5
+ ldx [%fp-24],%g2 ! p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-16],%g1 ! p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 ! align p16
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 ! add p16 to p0 (ADD1)
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ add %i0,4,%i0 ! res_ptr++
+ subcc %i2,1,%i2
+ be,pn %icc,L(end4)
+ std %f12,[%fp-16]
+
+ b,a L(loopm)
+
+ .align 16
+! BEGIN LOOP
+L(loop):
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 ! s1_ptr++
+ add %g5,%g1,%g1 ! add *res_ptr to p0 (ADD2)
+ add %g3,%g1,%g4 ! p += cy
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-24],%g2 ! p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-16],%g1 ! p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 ! align p16
+ st %g4,[%i0-4]
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 ! add p16 to p0 (ADD1)
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ std %f12,[%fp-16]
+ subcc %i2,1,%i2
+ be,pn %icc,L(loope)
+ add %i0,4,%i0 ! res_ptr++
+L(loopm):
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 ! s1_ptr++
+ add %g5,%g1,%g1 ! add *res_ptr to p0 (ADD2)
+ add %g3,%g1,%g4 ! p += cy
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-40],%g2 ! p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-32],%g1 ! p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 ! align p16
+ st %g4,[%i0-4]
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 ! add p16 to p0 (ADD1)
+ std %f14,[%fp-40]
+ fdtox %f4,%f12
+ std %f12,[%fp-32]
+ subcc %i2,1,%i2
+ bne,pt %icc,L(loop)
+ add %i0,4,%i0 ! res_ptr++
+! END LOOP
+
+ fxtod %f10,%f2
+ add %g5,%g1,%g1 ! add *res_ptr to p0 (ADD2)
+ add %g3,%g1,%g4 ! p += cy
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-24],%g2 ! p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-16],%g1 ! p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 ! align p16
+ st %g4,[%i0-4]
+ b,a L(xxx)
+L(loope):
+L(end4):
+ fxtod %f10,%f2
+ add %g5,%g1,%g1 ! add *res_ptr to p0 (ADD2)
+ add %g3,%g1,%g4 ! p += cy
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-40],%g2 ! p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-32],%g1 ! p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 ! align p16
+ st %g4,[%i0-4]
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 ! add p16 to p0 (ADD1)
+ std %f14,[%fp-40]
+ fdtox %f4,%f12
+ std %f12,[%fp-32]
+ add %i0,4,%i0 ! res_ptr++
+
+ add %g5,%g1,%g1 ! add *res_ptr to p0 (ADD2)
+ add %g3,%g1,%g4 ! p += cy
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-24],%g2 ! p16
+ ldx [%fp-16],%g1 ! p0
+ sllx %g2,16,%g2 ! align p16
+ st %g4,[%i0-4]
+ b,a L(yyy)
+
+L(end3):
+ fxtod %f10,%f2
+ ld [%i0],%g5
+ ldx [%fp-24],%g2 ! p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-16],%g1 ! p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 ! align p16
+L(xxx): fdtox %f16,%f14
+ add %g2,%g1,%g1 ! add p16 to p0 (ADD1)
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ std %f12,[%fp-16]
+ add %i0,4,%i0 ! res_ptr++
+
+ add %g5,%g1,%g1 ! add *res_ptr to p0 (ADD2)
+ add %g3,%g1,%g4 ! p += cy
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-40],%g2 ! p16
+ ldx [%fp-32],%g1 ! p0
+ sllx %g2,16,%g2 ! align p16
+ st %g4,[%i0-4]
+ add %g2,%g1,%g1 ! add p16 to p0 (ADD1)
+ add %i0,4,%i0 ! res_ptr++
+
+ add %g5,%g1,%g1 ! add *res_ptr to p0 (ADD2)
+ add %g3,%g1,%g4 ! p += cy
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-24],%g2 ! p16
+ ldx [%fp-16],%g1 ! p0
+ sllx %g2,16,%g2 ! align p16
+ st %g4,[%i0-4]
+ add %g2,%g1,%g1 ! add p16 to p0 (ADD1)
+ add %i0,4,%i0 ! res_ptr++
+ b,a L(ret)
+
+L(end2):
+ fxtod %f10,%f2
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-40]
+ fdtox %f4,%f12
+ std %f12,[%fp-32]
+ ld [%i0],%g5
+ ldx [%fp-24],%g2 ! p16
+ ldx [%fp-16],%g1 ! p0
+ sllx %g2,16,%g2 ! align p16
+L(yyy): add %g2,%g1,%g1 ! add p16 to p0 (ADD1)
+ add %i0,4,%i0 ! res_ptr++
+
+ add %g5,%g1,%g1 ! add *res_ptr to p0 (ADD2)
+ add %g3,%g1,%g4 ! p += cy
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-40],%g2 ! p16
+ ldx [%fp-32],%g1 ! p0
+ sllx %g2,16,%g2 ! align p16
+ st %g4,[%i0-4]
+ add %g2,%g1,%g1 ! add p16 to p0 (ADD1)
+ add %i0,4,%i0 ! res_ptr++
+ b,a L(ret)
+
+L(end1):
+ fxtod %f10,%f2
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ std %f12,[%fp-16]
+
+ ld [%i0],%g5
+ ldx [%fp-24],%g2 ! p16
+ ldx [%fp-16],%g1 ! p0
+ sllx %g2,16,%g2 ! align p16
+ add %g2,%g1,%g1 ! add p16 to p0 (ADD1)
+ add %i0,4,%i0 ! res_ptr++
+
+L(ret): add %g5,%g1,%g1 ! add *res_ptr to p0 (ADD2)
+ add %g3,%g1,%g4 ! p += cy
+ srlx %g4,32,%g3
+ st %g4,[%i0-4]
+
+ ret
+ restore %g0,%g3,%o0 ! sideeffect: put cy in retreg
+EPILOGUE(mpn_addmul_1)
diff --git a/mpn/sparc32/v9/mul_1.asm b/mpn/sparc32/v9/mul_1.asm
new file mode 100644
index 000000000..373e3ba77
--- /dev/null
+++ b/mpn/sparc32/v9/mul_1.asm
@@ -0,0 +1,261 @@
+! SPARC v9 32-bit __mpn_mul_1 -- Multiply a limb vector with a limb and
+! store the result in a second limb vector.
+
+! Copyright (C) 1998 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr i0
+! s1_ptr i1
+! size i2
+! s2_limb i3
+
+include(`../config.m4')
+
+ASM_START()
+ RODATA
+ ALIGN(4)
+L(noll):
+ .word 0
+PROLOGUE(mpn_mul_1)
+ save %sp,-256,%sp
+ sethi %hi(L(noll)),%g1
+ ld [%g1+%lo(L(noll))],%f10
+
+ sethi %hi(0xffff0000),%o0
+ andn %i3,%o0,%o0
+ st %o0,[%fp-16]
+ ld [%fp-16],%f11
+ fxtod %f10,%f6
+
+ srl %i3,16,%o0
+ st %o0,[%fp-16]
+ ld [%fp-16],%f11
+ fxtod %f10,%f8
+
+ mov 0,%g3 ! cy = 0
+
+ ld [%i1],%f11
+ subcc %i2,1,%i2
+ be,pn %icc,L(end1)
+ add %i1,4,%i1 ! s1_ptr++
+
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 ! s1_ptr++
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ subcc %i2,1,%i2
+ be,pn %icc,L(end2)
+ std %f12,[%fp-16]
+
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 ! s1_ptr++
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-40]
+ fdtox %f4,%f12
+ subcc %i2,1,%i2
+ be,pt %icc,L(end3)
+ std %f12,[%fp-32]
+
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 ! s1_ptr++
+ ldx [%fp-24],%g2 ! p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-16],%g1 ! p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 ! align p16
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 ! add p16 to p0 (ADD1)
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ add %i0,4,%i0 ! res_ptr++
+ subcc %i2,1,%i2
+ be,pn %icc,L(end4)
+ std %f12,[%fp-16]
+
+ b,a L(loopm)
+
+ .align 16
+! BEGIN LOOP
+L(loop):
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 ! s1_ptr++
+ add %g3,%g1,%g4 ! p += cy
+ srlx %g4,32,%g3
+ ldx [%fp-24],%g2 ! p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-16],%g1 ! p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 ! align p16
+ st %g4,[%i0-4]
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 ! add p16 to p0 (ADD1)
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ std %f12,[%fp-16]
+ subcc %i2,1,%i2
+ be,pn %icc,L(loope)
+ add %i0,4,%i0 ! res_ptr++
+L(loopm):
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 ! s1_ptr++
+ add %g3,%g1,%g4 ! p += cy
+ srlx %g4,32,%g3
+ ldx [%fp-40],%g2 ! p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-32],%g1 ! p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 ! align p16
+ st %g4,[%i0-4]
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 ! add p16 to p0 (ADD1)
+ std %f14,[%fp-40]
+ fdtox %f4,%f12
+ std %f12,[%fp-32]
+ subcc %i2,1,%i2
+ bne,pt %icc,L(loop)
+ add %i0,4,%i0 ! res_ptr++
+! END LOOP
+
+ fxtod %f10,%f2
+ add %g3,%g1,%g4 ! p += cy
+ srlx %g4,32,%g3
+ ldx [%fp-24],%g2 ! p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-16],%g1 ! p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 ! align p16
+ st %g4,[%i0-4]
+ b,a L(xxx)
+L(loope):
+L(end4):
+ fxtod %f10,%f2
+ add %g3,%g1,%g4 ! p += cy
+ srlx %g4,32,%g3
+ ldx [%fp-40],%g2 ! p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-32],%g1 ! p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 ! align p16
+ st %g4,[%i0-4]
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 ! add p16 to p0 (ADD1)
+ std %f14,[%fp-40]
+ fdtox %f4,%f12
+ std %f12,[%fp-32]
+ add %i0,4,%i0 ! res_ptr++
+
+ add %g3,%g1,%g4 ! p += cy
+ srlx %g4,32,%g3
+ ldx [%fp-24],%g2 ! p16
+ ldx [%fp-16],%g1 ! p0
+ sllx %g2,16,%g2 ! align p16
+ st %g4,[%i0-4]
+ b,a L(yyy)
+
+L(end3):
+ fxtod %f10,%f2
+ ldx [%fp-24],%g2 ! p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-16],%g1 ! p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 ! align p16
+L(xxx): fdtox %f16,%f14
+ add %g2,%g1,%g1 ! add p16 to p0 (ADD1)
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ std %f12,[%fp-16]
+ add %i0,4,%i0 ! res_ptr++
+
+ add %g3,%g1,%g4 ! p += cy
+ srlx %g4,32,%g3
+ ldx [%fp-40],%g2 ! p16
+ ldx [%fp-32],%g1 ! p0
+ sllx %g2,16,%g2 ! align p16
+ st %g4,[%i0-4]
+ add %g2,%g1,%g1 ! add p16 to p0 (ADD1)
+ add %i0,4,%i0 ! res_ptr++
+
+ add %g3,%g1,%g4 ! p += cy
+ srlx %g4,32,%g3
+ ldx [%fp-24],%g2 ! p16
+ ldx [%fp-16],%g1 ! p0
+ sllx %g2,16,%g2 ! align p16
+ st %g4,[%i0-4]
+ add %g2,%g1,%g1 ! add p16 to p0 (ADD1)
+ add %i0,4,%i0 ! res_ptr++
+ b,a L(ret)
+
+L(end2):
+ fxtod %f10,%f2
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-40]
+ fdtox %f4,%f12
+ std %f12,[%fp-32]
+ ldx [%fp-24],%g2 ! p16
+ ldx [%fp-16],%g1 ! p0
+ sllx %g2,16,%g2 ! align p16
+L(yyy): add %g2,%g1,%g1 ! add p16 to p0 (ADD1)
+ add %i0,4,%i0 ! res_ptr++
+
+ add %g3,%g1,%g4 ! p += cy
+ srlx %g4,32,%g3
+ ldx [%fp-40],%g2 ! p16
+ ldx [%fp-32],%g1 ! p0
+ sllx %g2,16,%g2 ! align p16
+ st %g4,[%i0-4]
+ add %g2,%g1,%g1 ! add p16 to p0 (ADD1)
+ add %i0,4,%i0 ! res_ptr++
+ b,a L(ret)
+
+L(end1):
+ fxtod %f10,%f2
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ std %f12,[%fp-16]
+
+ ldx [%fp-24],%g2 ! p16
+ ldx [%fp-16],%g1 ! p0
+ sllx %g2,16,%g2 ! align p16
+ add %g2,%g1,%g1 ! add p16 to p0 (ADD1)
+ add %i0,4,%i0 ! res_ptr++
+
+L(ret): add %g3,%g1,%g4 ! p += cy
+ srlx %g4,32,%g3
+ st %g4,[%i0-4]
+
+ ret
+ restore %g0,%g3,%o0 ! sideeffect: put cy in retreg
+EPILOGUE(mpn_mul_1)
diff --git a/mpn/sparc32/v9/submul_1.asm b/mpn/sparc32/v9/submul_1.asm
new file mode 100644
index 000000000..2c56301e3
--- /dev/null
+++ b/mpn/sparc32/v9/submul_1.asm
@@ -0,0 +1,285 @@
+! SPARC v9 32-bit __mpn_submul_1 -- Multiply a limb vector with a limb and
+! subtract the result from a second limb vector.
+
+! Copyright (C) 1998 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr i0
+! s1_ptr i1
+! size i2
+! s2_limb i3
+
+include(`../config.m4')
+
+ASM_START()
+ RODATA
+ ALIGN(4)
+L(noll):
+ .word 0
+PROLOGUE(mpn_submul_1)
+ save %sp,-256,%sp
+ sethi %hi(L(noll)),%g1
+ ld [%g1+%lo(L(noll))],%f10
+
+ sethi %hi(0xffff0000),%o0
+ andn %i3,%o0,%o0
+ st %o0,[%fp-16]
+ ld [%fp-16],%f11
+ fxtod %f10,%f6
+
+ srl %i3,16,%o0
+ st %o0,[%fp-16]
+ ld [%fp-16],%f11
+ fxtod %f10,%f8
+
+ mov 0,%g3 ! cy = 0
+
+ ld [%i1],%f11
+ subcc %i2,1,%i2
+ be,pn %icc,L(end1)
+ add %i1,4,%i1 ! s1_ptr++
+
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 ! s1_ptr++
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ subcc %i2,1,%i2
+ be,pn %icc,L(end2)
+ std %f12,[%fp-16]
+
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 ! s1_ptr++
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-40]
+ fdtox %f4,%f12
+ subcc %i2,1,%i2
+ be,pt %icc,L(end3)
+ std %f12,[%fp-32]
+
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 ! s1_ptr++
+ ld [%i0],%g5
+ ldx [%fp-24],%g2 ! p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-16],%g1 ! p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 ! align p16
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 ! add p16 to p0 (ADD1)
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ add %i0,4,%i0 ! res_ptr++
+ subcc %i2,1,%i2
+ be,pn %icc,L(end4)
+ std %f12,[%fp-16]
+
+ b,a L(loopm)
+
+ .align 16
+! BEGIN LOOP
+L(loop):
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 ! s1_ptr++
+ add %g3,%g1,%g4 ! p += cy
+ subcc %g5,%g4,%l2 ! add *res_ptr to p0 (ADD2)
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-24],%g2 ! p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-16],%g1 ! p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 ! align p16
+ st %l2,[%i0-4]
+ addx %g3,0,%g3
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 ! add p16 to p0 (ADD1)
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ std %f12,[%fp-16]
+ subcc %i2,1,%i2
+ be,pn %icc,L(loope)
+ add %i0,4,%i0 ! res_ptr++
+L(loopm):
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 ! s1_ptr++
+ add %g3,%g1,%g4 ! p += cy
+ subcc %g5,%g4,%l2 ! add *res_ptr to p0 (ADD2)
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-40],%g2 ! p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-32],%g1 ! p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 ! align p16
+ st %l2,[%i0-4]
+ addx %g3,0,%g3
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 ! add p16 to p0 (ADD1)
+ std %f14,[%fp-40]
+ fdtox %f4,%f12
+ std %f12,[%fp-32]
+ subcc %i2,1,%i2
+ bne,pt %icc,L(loop)
+ add %i0,4,%i0 ! res_ptr++
+! END LOOP
+
+ fxtod %f10,%f2
+ add %g3,%g1,%g4 ! p += cy
+ subxcc %g5,%g4,%l2 ! add *res_ptr to p0 (ADD2)
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-24],%g2 ! p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-16],%g1 ! p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 ! align p16
+ st %l2,[%i0-4]
+ b,a L(xxx)
+L(loope):
+L(end4):
+ fxtod %f10,%f2
+ add %g3,%g1,%g4 ! p += cy
+ subxcc %g5,%g4,%l2 ! add *res_ptr to p0 (ADD2)
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-40],%g2 ! p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-32],%g1 ! p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 ! align p16
+ st %l2,[%i0-4]
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 ! add p16 to p0 (ADD1)
+ std %f14,[%fp-40]
+ fdtox %f4,%f12
+ std %f12,[%fp-32]
+ add %i0,4,%i0 ! res_ptr++
+
+ add %g3,%g1,%g4 ! p += cy
+ subxcc %g5,%g4,%l2 ! add *res_ptr to p0 (ADD2)
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-24],%g2 ! p16
+ ldx [%fp-16],%g1 ! p0
+ sllx %g2,16,%g2 ! align p16
+ st %l2,[%i0-4]
+ b,a L(yyy)
+
+L(end3):
+ fxtod %f10,%f2
+ ld [%i0],%g5
+ ldx [%fp-24],%g2 ! p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-16],%g1 ! p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 ! align p16
+L(xxx): fdtox %f16,%f14
+ add %g2,%g1,%g1 ! add p16 to p0 (ADD1)
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ std %f12,[%fp-16]
+ add %i0,4,%i0 ! res_ptr++
+
+ add %g3,%g1,%g4 ! p += cy
+ subxcc %g5,%g4,%l2 ! add *res_ptr to p0 (ADD2)
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-40],%g2 ! p16
+ ldx [%fp-32],%g1 ! p0
+ sllx %g2,16,%g2 ! align p16
+ st %l2,[%i0-4]
+ add %g2,%g1,%g1 ! add p16 to p0 (ADD1)
+ add %i0,4,%i0 ! res_ptr++
+
+ add %g3,%g1,%g4 ! p += cy
+ subxcc %g5,%g4,%l2 ! add *res_ptr to p0 (ADD2)
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-24],%g2 ! p16
+ ldx [%fp-16],%g1 ! p0
+ sllx %g2,16,%g2 ! align p16
+ st %l2,[%i0-4]
+ add %g2,%g1,%g1 ! add p16 to p0 (ADD1)
+ add %i0,4,%i0 ! res_ptr++
+ b,a L(ret)
+
+L(end2):
+ fxtod %f10,%f2
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-40]
+ fdtox %f4,%f12
+ std %f12,[%fp-32]
+ ld [%i0],%g5
+ ldx [%fp-24],%g2 ! p16
+ ldx [%fp-16],%g1 ! p0
+ sllx %g2,16,%g2 ! align p16
+L(yyy): add %g2,%g1,%g1 ! add p16 to p0 (ADD1)
+ add %i0,4,%i0 ! res_ptr++
+
+ add %g3,%g1,%g4 ! p += cy
+ subxcc %g5,%g4,%l2 ! add *res_ptr to p0 (ADD2)
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-40],%g2 ! p16
+ ldx [%fp-32],%g1 ! p0
+ sllx %g2,16,%g2 ! align p16
+ st %l2,[%i0-4]
+ add %g2,%g1,%g1 ! add p16 to p0 (ADD1)
+ add %i0,4,%i0 ! res_ptr++
+ b,a L(ret)
+
+L(end1):
+ fxtod %f10,%f2
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ std %f12,[%fp-16]
+
+ ld [%i0],%g5
+ ldx [%fp-24],%g2 ! p16
+ ldx [%fp-16],%g1 ! p0
+ sllx %g2,16,%g2 ! align p16
+ add %g2,%g1,%g1 ! add p16 to p0 (ADD1)
+ add %i0,4,%i0 ! res_ptr++
+
+L(ret): add %g3,%g1,%g4 ! p += cy
+ subxcc %g5,%g4,%l2 ! add *res_ptr to p0 (ADD2)
+ srlx %g4,32,%g3
+ st %l2,[%i0-4]
+
+ addx %g3,%g0,%g3
+ ret
+ restore %g0,%g3,%o0 ! sideeffect: put cy in retreg
+EPILOGUE(mpn_submul_1)
diff --git a/mpn/sparc64/add_n.asm b/mpn/sparc64/add_n.asm
new file mode 100644
index 000000000..1cac6cfc1
--- /dev/null
+++ b/mpn/sparc64/add_n.asm
@@ -0,0 +1,172 @@
+! SPARC v9 __mpn_add_n -- Add two limb vectors of the same length > 0 and store
+! sum in a third limb vector.
+
+! Copyright (C) 1999 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr %o0
+! s1_ptr %o1
+! s2_ptr %o2
+! size %o3
+
+include(`../config.m4')
+
+ASM_START()
+ .register %g2,#scratch
+ .register %g3,#scratch
+PROLOGUE(mpn_add_n)
+
+! 12 mem ops >= 12 cycles
+! 8 shift insn >= 8 cycles
+! 8 addccc, executing alone, +8 cycles
+! Unrolling not mandatory...perhaps 2-way is best?
+! Put one ldx/stx and one s?lx per issue tuple, fill with pointer arith and loop ctl
+! All in all, it runs at 5 cycles/limb
+
+ save %sp,-160,%sp
+
+ addcc %g0,%g0,%g0
+
+ add %i3,-4,%i3
+ brlz,pn %i3,L(there)
+ nop
+
+ ldx [%i1+0],%l0
+ ldx [%i2+0],%l4
+ ldx [%i1+8],%l1
+ ldx [%i2+8],%l5
+ ldx [%i1+16],%l2
+ ldx [%i2+16],%l6
+ ldx [%i1+24],%l3
+ ldx [%i2+24],%l7
+ add %i1,32,%i1
+ add %i2,32,%i2
+
+ add %i3,-4,%i3
+ brlz,pn %i3,L(skip)
+ nop
+ b L(loop1) ! jump instead of executing many NOPs
+ nop
+ ALIGN(32)
+!--------- Start main loop ---------
+L(loop1):
+ addccc %l0,%l4,%g1
+!-
+ srlx %l0,32,%o0
+ ldx [%i1+0],%l0
+!-
+ srlx %l4,32,%o4
+ ldx [%i2+0],%l4
+!-
+ addccc %o0,%o4,%g0
+!-
+ addccc %l1,%l5,%g2
+!-
+ srlx %l1,32,%o1
+ ldx [%i1+8],%l1
+!-
+ srlx %l5,32,%o5
+ ldx [%i2+8],%l5
+!-
+ addccc %o1,%o5,%g0
+!-
+ addccc %l2,%l6,%g3
+!-
+ srlx %l2,32,%o2
+ ldx [%i1+16],%l2
+!-
+ srlx %l6,32,%g5 ! asymmetry
+ ldx [%i2+16],%l6
+!-
+ addccc %o2,%g5,%g0
+!-
+ addccc %l3,%l7,%g4
+!-
+ srlx %l3,32,%o3
+ ldx [%i1+24],%l3
+ add %i1,32,%i1
+!-
+ srlx %l7,32,%o7
+ ldx [%i2+24],%l7
+ add %i2,32,%i2
+!-
+ addccc %o3,%o7,%g0
+!-
+ stx %g1,[%i0+0]
+!-
+ stx %g2,[%i0+8]
+!-
+ stx %g3,[%i0+16]
+ add %i3,-4,%i3
+!-
+ stx %g4,[%i0+24]
+ add %i0,32,%i0
+
+ brgez,pt %i3,L(loop1)
+ nop
+!--------- End main loop ---------
+L(skip):
+ addccc %l0,%l4,%g1
+ srlx %l0,32,%o0
+ srlx %l4,32,%o4
+ addccc %o0,%o4,%g0
+ addccc %l1,%l5,%g2
+ srlx %l1,32,%o1
+ srlx %l5,32,%o5
+ addccc %o1,%o5,%g0
+ addccc %l2,%l6,%g3
+ srlx %l2,32,%o2
+ srlx %l6,32,%g5 ! asymmetry
+ addccc %o2,%g5,%g0
+ addccc %l3,%l7,%g4
+ srlx %l3,32,%o3
+ srlx %l7,32,%o7
+ addccc %o3,%o7,%g0
+ stx %g1,[%i0+0]
+ stx %g2,[%i0+8]
+ stx %g3,[%i0+16]
+ stx %g4,[%i0+24]
+ add %i0,32,%i0
+
+L(there):
+ add %i3,4,%i3
+ brz,pt %i3,L(end)
+ nop
+
+L(loop2):
+ ldx [%i1+0],%l0
+ add %i1,8,%i1
+ ldx [%i2+0],%l4
+ add %i2,8,%i2
+ srlx %l0,32,%g2
+ srlx %l4,32,%g3
+ addccc %l0,%l4,%g1
+ addccc %g2,%g3,%g0
+ stx %g1,[%i0+0]
+ add %i0,8,%i0
+ add %i3,-1,%i3
+ brgz,pt %i3,L(loop2)
+ nop
+
+L(end): addc %g0,%g0,%i0
+ ret
+ restore
+EPILOGUE(mpn_add_n)
diff --git a/mpn/sparc64/addmul_1.asm b/mpn/sparc64/addmul_1.asm
new file mode 100644
index 000000000..6206d6363
--- /dev/null
+++ b/mpn/sparc64/addmul_1.asm
@@ -0,0 +1,86 @@
+! SPARC v9 __mpn_addmul_1 -- Multiply a limb vector with a single limb and
+! add the product to a second limb vector.
+
+! Copyright (C) 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr o0
+! s1_ptr o1
+! size o2
+! s2_limb o3
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+ save %sp,-160,%sp
+ sub %g0,%i2,%o7
+ sllx %o7,3,%g5
+ sub %i1,%g5,%o3
+ sub %i0,%g5,%o4
+ mov 0,%o0 ! zero cy_limb
+
+ srl %i3,0,%o1 ! extract low 32 bits of s2_limb
+ srlx %i3,32,%i3 ! extract high 32 bits of s2_limb
+ mov 1,%o2
+ sllx %o2,32,%o2 ! o2 = 0x100000000
+
+ ! hi !
+ ! mid-1 !
+ ! mid-2 !
+ ! lo !
+L(loop):
+ sllx %o7,3,%g1
+ ldx [%o3+%g1],%g5
+ srl %g5,0,%i0 ! zero hi bits
+ srlx %g5,32,%g5
+ mulx %o1,%i0,%i4 ! lo product
+ mulx %i3,%i0,%i1 ! mid-1 product
+ mulx %o1,%g5,%l2 ! mid-2 product
+ mulx %i3,%g5,%i5 ! hi product
+ srlx %i4,32,%i0 ! extract high 32 bits of lo product...
+ add %i1,%i0,%i1 ! ...and add it to the mid-1 product
+ addcc %i1,%l2,%i1 ! add mid products
+ mov 0,%l0 ! we need the carry from that add...
+ movcs %xcc,%o2,%l0 ! ...compute it and...
+ add %i5,%l0,%i5 ! ...add to bit 32 of the hi product
+ sllx %i1,32,%i0 ! align low bits of mid product
+ srl %i4,0,%g5 ! zero high 32 bits of lo product
+ add %i0,%g5,%i0 ! combine into low 64 bits of result
+ srlx %i1,32,%i1 ! extract high bits of mid product...
+ add %i5,%i1,%i1 ! ...and add them to the high result
+ addcc %i0,%o0,%i0 ! add cy_limb to low 64 bits of result
+ mov 0,%g5
+ movcs %xcc,1,%g5
+ add %i1,%g5,%i1
+ add %o7,1,%o7
+ ldx [%o4+%g1],%l1
+ addcc %l1,%i0,%i0
+ mov 0,%g5
+ movcs %xcc,1,%g5
+ stx %i0,[%o4+%g1]
+ brnz %o7,L(loop)
+ add %i1,%g5,%o0 ! compute new cy_limb
+
+ mov %o0,%i0
+ ret
+ restore
+EPILOGUE(mpn_addmul_1)
diff --git a/mpn/sparc64/copyi.asm b/mpn/sparc64/copyi.asm
new file mode 100644
index 000000000..92b2f3156
--- /dev/null
+++ b/mpn/sparc64/copyi.asm
@@ -0,0 +1,77 @@
+! SPARC v9 __mpn_copy -- Copy a limb vector.
+
+! Copyright (C) 1999 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! rptr %o0
+! sptr %o1
+! n %o2
+
+include(`../config.m4')
+
+ASM_START()
+ .register %g2,#scratch
+ .register %g3,#scratch
+PROLOGUE(mpn_copyi)
+ add %o2,-8,%o2
+ brlz,pn %o2,L(skip)
+ nop
+
+ ALIGN(16)
+L(Loop1):
+ ldx [%o1+0],%g1
+ ldx [%o1+8],%g2
+ ldx [%o1+16],%g3
+ ldx [%o1+24],%g4
+ ldx [%o1+32],%g5
+ ldx [%o1+40],%o3
+ ldx [%o1+48],%o4
+ ldx [%o1+56],%o5
+ add %o1,64,%o1
+ stx %g1,[%o0+0]
+ stx %g2,[%o0+8]
+ stx %g3,[%o0+16]
+ stx %g4,[%o0+24]
+ stx %g5,[%o0+32]
+ stx %o3,[%o0+40]
+ stx %o4,[%o0+48]
+ stx %o5,[%o0+56]
+ add %o2,-8,%o2
+ brgez,pt %o2,L(Loop1)
+ add %o0,64,%o0
+
+L(skip):
+ add %o2,8,%o2
+ brz,pt %o2,L(end)
+ nop
+
+L(loop2):
+ ldx [%o1],%g1
+ add %o1,8,%o1
+ add %o2,-1,%o2
+ stx %g1,[%o0]
+ add %o0,8,%o0
+ brgz,pt %o2,L(loop2)
+ nop
+
+L(end): retl
+ nop
+EPILOGUE(mpn_copyi)
diff --git a/mpn/sparc64/lshift.asm b/mpn/sparc64/lshift.asm
new file mode 100644
index 000000000..745c2e09b
--- /dev/null
+++ b/mpn/sparc64/lshift.asm
@@ -0,0 +1,97 @@
+! SPARC v9 __mpn_lshift --
+
+! Copyright (C) 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr %o0
+! src_ptr %o1
+! size %o2
+! cnt %o3
+
+include(`../config.m4')
+
+ASM_START()
+ .register %g2,#scratch
+ .register %g3,#scratch
+PROLOGUE(mpn_lshift)
+ sllx %o2,3,%g1
+ add %o1,%g1,%o1 ! make %o1 point at end of src
+ ldx [%o1-8],%g2 ! load first limb
+ sub %g0,%o3,%o5 ! negate shift count
+ add %o0,%g1,%o0 ! make %o0 point at end of res
+ add %o2,-1,%o2
+ and %o2,4-1,%g4 ! number of limbs in first loop
+ srlx %g2,%o5,%g1 ! compute function result
+ brz,pn %g4,L(0) ! if multiple of 4 limbs, skip first loop
+ stx %g1,[%sp+79]
+
+ sub %o2,%g4,%o2 ! adjust count for main loop
+
+L(loop0):
+ ldx [%o1-16],%g3
+ add %o0,-8,%o0
+ add %o1,-8,%o1
+ add %g4,-1,%g4
+ sllx %g2,%o3,%o4
+ srlx %g3,%o5,%g1
+ mov %g3,%g2
+ or %o4,%g1,%o4
+ brnz,pt %g4,L(loop0)
+ stx %o4,[%o0+0]
+
+L(0): brz,pn %o2,L(end)
+ nop
+
+L(loop1):
+ ldx [%o1-16],%g3
+ add %o0,-32,%o0
+ add %o2,-4,%o2
+ sllx %g2,%o3,%o4
+ srlx %g3,%o5,%g1
+
+ ldx [%o1-24],%g2
+ sllx %g3,%o3,%g4
+ or %o4,%g1,%o4
+ stx %o4,[%o0+24]
+ srlx %g2,%o5,%g1
+
+ ldx [%o1-32],%g3
+ sllx %g2,%o3,%o4
+ or %g4,%g1,%g4
+ stx %g4,[%o0+16]
+ srlx %g3,%o5,%g1
+
+ ldx [%o1-40],%g2
+ sllx %g3,%o3,%g4
+ or %o4,%g1,%o4
+ stx %o4,[%o0+8]
+ srlx %g2,%o5,%g1
+
+ add %o1,-32,%o1
+ or %g4,%g1,%g4
+ brnz,pt %o2,L(loop1)
+ stx %g4,[%o0+0]
+
+L(end): sllx %g2,%o3,%g2
+ stx %g2,[%o0-8]
+ retl
+ ldx [%sp+79],%o0
+EPILOGUE(mpn_lshift)
diff --git a/mpn/sparc64/mul_1.asm b/mpn/sparc64/mul_1.asm
new file mode 100644
index 000000000..573d18f88
--- /dev/null
+++ b/mpn/sparc64/mul_1.asm
@@ -0,0 +1,81 @@
+! SPARC v9 __mpn_mul_1 -- Multiply a limb vector with a single limb and
+! store the product in a second limb vector.
+
+! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr o0
+! s1_ptr o1
+! size o2
+! s2_limb o3
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+ save %sp,-160,%sp
+ sub %g0,%i2,%o7
+ sllx %o7,3,%g5
+ sub %i1,%g5,%o3
+ sub %i0,%g5,%o4
+ mov 0,%o0 ! zero cy_limb
+
+ srl %i3,0,%o1 ! extract low 32 bits of s2_limb
+ srlx %i3,32,%i3 ! extract high 32 bits of s2_limb
+ mov 1,%o2
+ sllx %o2,32,%o2 ! o2 = 0x100000000
+
+ ! hi !
+ ! mid-1 !
+ ! mid-2 !
+ ! lo !
+L(loop):
+ sllx %o7,3,%g1
+ ldx [%o3+%g1],%g5
+ srl %g5,0,%i0 ! zero hi bits
+ srlx %g5,32,%g5
+ mulx %o1,%i0,%i4 ! lo product
+ mulx %i3,%i0,%i1 ! mid-1 product
+ mulx %o1,%g5,%l2 ! mid-2 product
+ mulx %i3,%g5,%i5 ! hi product
+ srlx %i4,32,%i0 ! extract high 32 bits of lo product...
+ add %i1,%i0,%i1 ! ...and add it to the mid-1 product
+ addcc %i1,%l2,%i1 ! add mid products
+ mov 0,%l0 ! we need the carry from that add...
+ movcs %xcc,%o2,%l0 ! ...compute it and...
+ add %i5,%l0,%i5 ! ...add to bit 32 of the hi product
+ sllx %i1,32,%i0 ! align low bits of mid product
+ srl %i4,0,%g5 ! zero high 32 bits of lo product
+ add %i0,%g5,%i0 ! combine into low 64 bits of result
+ srlx %i1,32,%i1 ! extract high bits of mid product...
+ add %i5,%i1,%i1 ! ...and add them to the high result
+ addcc %i0,%o0,%i0 ! add cy_limb to low 64 bits of result
+ mov 0,%g5
+ movcs %xcc,1,%g5
+ add %o7,1,%o7
+ stx %i0,[%o4+%g1]
+ brnz %o7,L(loop)
+ add %i1,%g5,%o0 ! compute new cy_limb
+
+ mov %o0,%i0
+ ret
+ restore
+EPILOGUE(mpn_mul_1)
diff --git a/mpn/sparc64/rshift.asm b/mpn/sparc64/rshift.asm
new file mode 100644
index 000000000..e78170801
--- /dev/null
+++ b/mpn/sparc64/rshift.asm
@@ -0,0 +1,94 @@
+! SPARC v9 __mpn_rshift --
+
+! Copyright (C) 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr %o0
+! src_ptr %o1
+! size %o2
+! cnt %o3
+
+include(`../config.m4')
+
+ASM_START()
+ .register %g2,#scratch
+ .register %g3,#scratch
+PROLOGUE(mpn_rshift)
+ ldx [%o1],%g2 ! load first limb
+ sub %g0,%o3,%o5 ! negate shift count
+ add %o2,-1,%o2
+ and %o2,4-1,%g4 ! number of limbs in first loop
+ sllx %g2,%o5,%g1 ! compute function result
+ brz,pn %g4,L(0) ! if multiple of 4 limbs, skip first loop
+ stx %g1,[%sp+79]
+
+ sub %o2,%g4,%o2 ! adjust count for main loop
+
+L(loop0):
+ ldx [%o1+8],%g3
+ add %o0,8,%o0
+ add %o1,8,%o1
+ add %g4,-1,%g4
+ srlx %g2,%o3,%o4
+ sllx %g3,%o5,%g1
+ mov %g3,%g2
+ or %o4,%g1,%o4
+ brnz,pt %g4,L(loop0)
+ stx %o4,[%o0-8]
+
+L(0): brz,pn %o2,L(end)
+ nop
+
+L(loop1):
+ ldx [%o1+8],%g3
+ add %o0,32,%o0
+ add %o2,-4,%o2
+ srlx %g2,%o3,%o4
+ sllx %g3,%o5,%g1
+
+ ldx [%o1+16],%g2
+ srlx %g3,%o3,%g4
+ or %o4,%g1,%o4
+ stx %o4,[%o0-32]
+ sllx %g2,%o5,%g1
+
+ ldx [%o1+24],%g3
+ srlx %g2,%o3,%o4
+ or %g4,%g1,%g4
+ stx %g4,[%o0-24]
+ sllx %g3,%o5,%g1
+
+ ldx [%o1+32],%g2
+ srlx %g3,%o3,%g4
+ or %o4,%g1,%o4
+ stx %o4,[%o0-16]
+ sllx %g2,%o5,%g1
+
+ add %o1,32,%o1
+ or %g4,%g1,%g4
+ brnz %o2,L(loop1)
+ stx %g4,[%o0-8]
+
+L(end): srlx %g2,%o3,%g2
+ stx %g2,[%o0-0]
+ retl
+ ldx [%sp+79],%o0
+EPILOGUE(mpn_rshift)
diff --git a/mpn/sparc64/sub_n.asm b/mpn/sparc64/sub_n.asm
new file mode 100644
index 000000000..df96e89b9
--- /dev/null
+++ b/mpn/sparc64/sub_n.asm
@@ -0,0 +1,172 @@
+! SPARC v9 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+! store difference in a third limb vector.
+
+! Copyright (C) 1999 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr %o0
+! s1_ptr %o1
+! s2_ptr %o2
+! size %o3
+
+include(`../config.m4')
+
+ASM_START()
+ .register %g2,#scratch
+ .register %g3,#scratch
+PROLOGUE(mpn_sub_n)
+
+! 12 mem ops >= 12 cycles
+! 8 shift insn >= 8 cycles
+! 8 addccc, executing alone, +8 cycles
+! Unrolling not mandatory...perhaps 2-way is best?
+! Put one ldx/stx and one s?lx per issue tuple, fill with pointer arith and loop ctl
+! All in all, it runs at 5 cycles/limb
+
+ save %sp,-160,%sp
+
+ addcc %g0,%g0,%g0
+
+ add %i3,-4,%i3
+ brlz,pn %i3,L(there)
+ nop
+
+ ldx [%i1+0],%l0
+ ldx [%i2+0],%l4
+ ldx [%i1+8],%l1
+ ldx [%i2+8],%l5
+ ldx [%i1+16],%l2
+ ldx [%i2+16],%l6
+ ldx [%i1+24],%l3
+ ldx [%i2+24],%l7
+ add %i1,32,%i1
+ add %i2,32,%i2
+
+ add %i3,-4,%i3
+ brlz,pn %i3,L(skip)
+ nop
+ b L(loop1) ! jump instead of executing many NOPs
+ nop
+ ALIGN(32)
+!--------- Start main loop ---------
+L(loop1):
+ subccc %l0,%l4,%g1
+!-
+ srlx %l0,32,%o0
+ ldx [%i1+0],%l0
+!-
+ srlx %l4,32,%o4
+ ldx [%i2+0],%l4
+!-
+ subccc %o0,%o4,%g0
+!-
+ subccc %l1,%l5,%g2
+!-
+ srlx %l1,32,%o1
+ ldx [%i1+8],%l1
+!-
+ srlx %l5,32,%o5
+ ldx [%i2+8],%l5
+!-
+ subccc %o1,%o5,%g0
+!-
+ subccc %l2,%l6,%g3
+!-
+ srlx %l2,32,%o2
+ ldx [%i1+16],%l2
+!-
+ srlx %l6,32,%g5 ! asymmetry
+ ldx [%i2+16],%l6
+!-
+ subccc %o2,%g5,%g0
+!-
+ subccc %l3,%l7,%g4
+!-
+ srlx %l3,32,%o3
+ ldx [%i1+24],%l3
+ add %i1,32,%i1
+!-
+ srlx %l7,32,%o7
+ ldx [%i2+24],%l7
+ add %i2,32,%i2
+!-
+ subccc %o3,%o7,%g0
+!-
+ stx %g1,[%i0+0]
+!-
+ stx %g2,[%i0+8]
+!-
+ stx %g3,[%i0+16]
+ add %i3,-4,%i3
+!-
+ stx %g4,[%i0+24]
+ add %i0,32,%i0
+
+ brgez,pt %i3,L(loop1)
+ nop
+!--------- End main loop ---------
+L(skip):
+ subccc %l0,%l4,%g1
+ srlx %l0,32,%o0
+ srlx %l4,32,%o4
+ subccc %o0,%o4,%g0
+ subccc %l1,%l5,%g2
+ srlx %l1,32,%o1
+ srlx %l5,32,%o5
+ subccc %o1,%o5,%g0
+ subccc %l2,%l6,%g3
+ srlx %l2,32,%o2
+ srlx %l6,32,%g5 ! asymmetry
+ subccc %o2,%g5,%g0
+ subccc %l3,%l7,%g4
+ srlx %l3,32,%o3
+ srlx %l7,32,%o7
+ subccc %o3,%o7,%g0
+ stx %g1,[%i0+0]
+ stx %g2,[%i0+8]
+ stx %g3,[%i0+16]
+ stx %g4,[%i0+24]
+ add %i0,32,%i0
+
+L(there):
+ add %i3,4,%i3
+ brz,pt %i3,L(end)
+ nop
+
+L(loop2):
+ ldx [%i1+0],%l0
+ add %i1,8,%i1
+ ldx [%i2+0],%l4
+ add %i2,8,%i2
+ srlx %l0,32,%g2
+ srlx %l4,32,%g3
+ subccc %l0,%l4,%g1
+ subccc %g2,%g3,%g0
+ stx %g1,[%i0+0]
+ add %i0,8,%i0
+ add %i3,-1,%i3
+ brgz,pt %i3,L(loop2)
+ nop
+
+L(end): addc %g0,%g0,%i0
+ ret
+ restore
+EPILOGUE(mpn_sub_n)
diff --git a/mpn/sparc64/submul_1.asm b/mpn/sparc64/submul_1.asm
new file mode 100644
index 000000000..d8a8002b1
--- /dev/null
+++ b/mpn/sparc64/submul_1.asm
@@ -0,0 +1,86 @@
+! SPARC v9 __mpn_submul_1 -- Multiply a limb vector with a single limb and
+! subtract the product from a second limb vector.
+
+! Copyright (C) 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr o0
+! s1_ptr o1
+! size o2
+! s2_limb o3
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+ save %sp,-160,%sp
+ sub %g0,%i2,%o7
+ sllx %o7,3,%g5
+ sub %i1,%g5,%o3
+ sub %i0,%g5,%o4
+ mov 0,%o0 ! zero cy_limb
+
+ srl %i3,0,%o1 ! extract low 32 bits of s2_limb
+ srlx %i3,32,%i3 ! extract high 32 bits of s2_limb
+ mov 1,%o2
+ sllx %o2,32,%o2 ! o2 = 0x100000000
+
+ ! hi !
+ ! mid-1 !
+ ! mid-2 !
+ ! lo !
+L(loop):
+ sllx %o7,3,%g1
+ ldx [%o3+%g1],%g5
+ srl %g5,0,%i0 ! zero hi bits
+ srlx %g5,32,%g5
+ mulx %o1,%i0,%i4 ! lo product
+ mulx %i3,%i0,%i1 ! mid-1 product
+ mulx %o1,%g5,%l2 ! mid-2 product
+ mulx %i3,%g5,%i5 ! hi product
+ srlx %i4,32,%i0 ! extract high 32 bits of lo product...
+ add %i1,%i0,%i1 ! ...and add it to the mid-1 product
+ addcc %i1,%l2,%i1 ! add mid products
+ mov 0,%l0 ! we need the carry from that add...
+ movcs %xcc,%o2,%l0 ! ...compute it and...
+ add %i5,%l0,%i5 ! ...add to bit 32 of the hi product
+ sllx %i1,32,%i0 ! align low bits of mid product
+ srl %i4,0,%g5 ! zero high 32 bits of lo product
+ add %i0,%g5,%i0 ! combine into low 64 bits of result
+ srlx %i1,32,%i1 ! extract high bits of mid product...
+ add %i5,%i1,%i1 ! ...and add them to the high result
+ addcc %i0,%o0,%i0 ! add cy_limb to low 64 bits of result
+ mov 0,%g5
+ movcs %xcc,1,%g5
+ add %i1,%g5,%i1
+ add %o7,1,%o7
+ ldx [%o4+%g1],%l1
+ subcc %l1,%i0,%i0
+ mov 0,%g5
+ movcs %xcc,1,%g5
+ stx %i0,[%o4+%g1]
+ brnz %o7,L(loop)
+ add %i1,%g5,%o0 ! compute new cy_limb
+
+ mov %o0,%i0
+ ret
+ restore
+EPILOGUE(mpn_submul_1)