diff options
Diffstat (limited to 'rts/gmp/mpn/sparc32')
-rw-r--r-- | rts/gmp/mpn/sparc32/README | 36 | ||||
-rw-r--r-- | rts/gmp/mpn/sparc32/add_n.asm | 236 | ||||
-rw-r--r-- | rts/gmp/mpn/sparc32/addmul_1.asm | 146 | ||||
-rw-r--r-- | rts/gmp/mpn/sparc32/lshift.asm | 97 | ||||
-rw-r--r-- | rts/gmp/mpn/sparc32/mul_1.asm | 137 | ||||
-rw-r--r-- | rts/gmp/mpn/sparc32/rshift.asm | 93 | ||||
-rw-r--r-- | rts/gmp/mpn/sparc32/sub_n.asm | 326 | ||||
-rw-r--r-- | rts/gmp/mpn/sparc32/submul_1.asm | 146 | ||||
-rw-r--r-- | rts/gmp/mpn/sparc32/udiv_fp.asm | 158 | ||||
-rw-r--r-- | rts/gmp/mpn/sparc32/udiv_nfp.asm | 193 | ||||
-rw-r--r-- | rts/gmp/mpn/sparc32/umul.asm | 68 | ||||
-rw-r--r-- | rts/gmp/mpn/sparc32/v8/addmul_1.asm | 122 | ||||
-rw-r--r-- | rts/gmp/mpn/sparc32/v8/mul_1.asm | 103 | ||||
-rw-r--r-- | rts/gmp/mpn/sparc32/v8/submul_1.asm | 58 | ||||
-rw-r--r-- | rts/gmp/mpn/sparc32/v8/supersparc/udiv.asm | 122 | ||||
-rw-r--r-- | rts/gmp/mpn/sparc32/v8/umul.asm | 31 | ||||
-rw-r--r-- | rts/gmp/mpn/sparc32/v9/README | 4 | ||||
-rw-r--r-- | rts/gmp/mpn/sparc32/v9/addmul_1.asm | 288 | ||||
-rw-r--r-- | rts/gmp/mpn/sparc32/v9/gmp-mparam.h | 69 | ||||
-rw-r--r-- | rts/gmp/mpn/sparc32/v9/mul_1.asm | 267 | ||||
-rw-r--r-- | rts/gmp/mpn/sparc32/v9/submul_1.asm | 291 |
21 files changed, 0 insertions, 2991 deletions
diff --git a/rts/gmp/mpn/sparc32/README b/rts/gmp/mpn/sparc32/README deleted file mode 100644 index 7c19df7bc4..0000000000 --- a/rts/gmp/mpn/sparc32/README +++ /dev/null @@ -1,36 +0,0 @@ -This directory contains mpn functions for various SPARC chips. Code that -runs only on version 8 SPARC implementations, is in the v8 subdirectory. - -RELEVANT OPTIMIZATION ISSUES - - Load and Store timing - -On most early SPARC implementations, the ST instructions takes multiple -cycles, while a STD takes just a single cycle more than an ST. For the CPUs -in SPARCstation I and II, the times are 3 and 4 cycles, respectively. -Therefore, combining two ST instrucitons into a STD when possible is a -significant optimiation. - -Later SPARC implementations have single cycle ST. - -For SuperSPARC, we can perform just one memory instruction per cycle, even -if up to two integer instructions can be executed in its pipeline. For -programs that perform so many memory operations that there are not enough -non-memory operations to issue in parallel with all memory operations, using -LDD and STD when possible helps. - -STATUS - -1. On a SuperSPARC, mpn_lshift and mpn_rshift run at 3 cycles/limb, or 2.5 - cycles/limb asymptotically. We could optimize speed for special counts - by using ADDXCC. - -2. On a SuperSPARC, mpn_add_n and mpn_sub_n runs at 2.5 cycles/limb, or 2 - cycles/limb asymptotically. - -3. mpn_mul_1 runs at what is believed to be optimal speed. - -4. On SuperSPARC, mpn_addmul_1 and mpn_submul_1 could both be improved by a - cycle by avoiding one of the add instrucitons. See a29k/addmul_1. - -The speed of the code for other SPARC implementations is uncertain. diff --git a/rts/gmp/mpn/sparc32/add_n.asm b/rts/gmp/mpn/sparc32/add_n.asm deleted file mode 100644 index 5f1d00c0e0..0000000000 --- a/rts/gmp/mpn/sparc32/add_n.asm +++ /dev/null @@ -1,236 +0,0 @@ -dnl SPARC mpn_add_n -- Add two limb vectors of the same length > 0 and store -dnl sum in a third limb vector. - -dnl Copyright (C) 1995, 1996, 2000 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. - -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of the GNU Lesser General Public License as published -dnl by the Free Software Foundation; either version 2.1 of the License, or (at -dnl your option) any later version. - -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -dnl License for more details. - -dnl You should have received a copy of the GNU Lesser General Public License -dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to -dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, -dnl MA 02111-1307, USA. - - -include(`../config.m4') - -C INPUT PARAMETERS -define(res_ptr,%o0) -define(s1_ptr,%o1) -define(s2_ptr,%o2) -define(n,%o3) - -ASM_START() -PROLOGUE(mpn_add_n) - xor s2_ptr,res_ptr,%g1 - andcc %g1,4,%g0 - bne L(1) C branch if alignment differs - nop -C ** V1a ** -L(0): andcc res_ptr,4,%g0 C res_ptr unaligned? Side effect: cy=0 - be L(v1) C if no, branch - nop -C Add least significant limb separately to align res_ptr and s2_ptr - ld [s1_ptr],%g4 - add s1_ptr,4,s1_ptr - ld [s2_ptr],%g2 - add s2_ptr,4,s2_ptr - add n,-1,n - addcc %g4,%g2,%o4 - st %o4,[res_ptr] - add res_ptr,4,res_ptr -L(v1): addx %g0,%g0,%o4 C save cy in register - cmp n,2 C if n < 2 ... - bl L(end2) C ... branch to tail code - subcc %g0,%o4,%g0 C restore cy - - ld [s1_ptr+0],%g4 - addcc n,-10,n - ld [s1_ptr+4],%g1 - ldd [s2_ptr+0],%g2 - blt L(fin1) - subcc %g0,%o4,%g0 C restore cy -C Add blocks of 8 limbs until less than 8 limbs remain -L(loop1): - addxcc %g4,%g2,%o4 - ld [s1_ptr+8],%g4 - addxcc %g1,%g3,%o5 - ld [s1_ptr+12],%g1 - ldd [s2_ptr+8],%g2 - std %o4,[res_ptr+0] - addxcc %g4,%g2,%o4 - ld [s1_ptr+16],%g4 - addxcc %g1,%g3,%o5 - ld [s1_ptr+20],%g1 - ldd [s2_ptr+16],%g2 - std %o4,[res_ptr+8] - addxcc %g4,%g2,%o4 - ld [s1_ptr+24],%g4 - addxcc %g1,%g3,%o5 - ld [s1_ptr+28],%g1 - ldd [s2_ptr+24],%g2 - std %o4,[res_ptr+16] - addxcc %g4,%g2,%o4 - ld [s1_ptr+32],%g4 - addxcc %g1,%g3,%o5 - ld [s1_ptr+36],%g1 - ldd [s2_ptr+32],%g2 - std %o4,[res_ptr+24] - addx %g0,%g0,%o4 C save cy in register - addcc n,-8,n - add s1_ptr,32,s1_ptr - add s2_ptr,32,s2_ptr - add res_ptr,32,res_ptr - bge L(loop1) - subcc %g0,%o4,%g0 C restore cy - -L(fin1): - addcc n,8-2,n - blt L(end1) - subcc %g0,%o4,%g0 C restore cy -C Add blocks of 2 limbs until less than 2 limbs remain -L(loope1): - addxcc %g4,%g2,%o4 - ld [s1_ptr+8],%g4 - addxcc %g1,%g3,%o5 - ld [s1_ptr+12],%g1 - ldd [s2_ptr+8],%g2 - std %o4,[res_ptr+0] - addx %g0,%g0,%o4 C save cy in register - addcc n,-2,n - add s1_ptr,8,s1_ptr - add s2_ptr,8,s2_ptr - add res_ptr,8,res_ptr - bge L(loope1) - subcc %g0,%o4,%g0 C restore cy -L(end1): - addxcc %g4,%g2,%o4 - addxcc %g1,%g3,%o5 - std %o4,[res_ptr+0] - addx %g0,%g0,%o4 C save cy in register - - andcc n,1,%g0 - be L(ret1) - subcc %g0,%o4,%g0 C restore cy -C Add last limb - ld [s1_ptr+8],%g4 - ld [s2_ptr+8],%g2 - addxcc %g4,%g2,%o4 - st %o4,[res_ptr+8] - -L(ret1): - retl - addx %g0,%g0,%o0 C return carry-out from most sign. limb - -L(1): xor s1_ptr,res_ptr,%g1 - andcc %g1,4,%g0 - bne L(2) - nop -C ** V1b ** - mov s2_ptr,%g1 - mov s1_ptr,s2_ptr - b L(0) - mov %g1,s1_ptr - -C ** V2 ** -C If we come here, the alignment of s1_ptr and res_ptr as well as the -C alignment of s2_ptr and res_ptr differ. Since there are only two ways -C things can be aligned (that we care about) we now know that the alignment -C of s1_ptr and s2_ptr are the same. - -L(2): cmp n,1 - be L(jone) - nop - andcc s1_ptr,4,%g0 C s1_ptr unaligned? Side effect: cy=0 - be L(v2) C if no, branch - nop -C Add least significant limb separately to align s1_ptr and s2_ptr - ld [s1_ptr],%g4 - add s1_ptr,4,s1_ptr - ld [s2_ptr],%g2 - add s2_ptr,4,s2_ptr - add n,-1,n - addcc %g4,%g2,%o4 - st %o4,[res_ptr] - add res_ptr,4,res_ptr - -L(v2): addx %g0,%g0,%o4 C save cy in register - addcc n,-8,n - blt L(fin2) - subcc %g0,%o4,%g0 C restore cy -C Add blocks of 8 limbs until less than 8 limbs remain -L(loop2): - ldd [s1_ptr+0],%g2 - ldd [s2_ptr+0],%o4 - addxcc %g2,%o4,%g2 - st %g2,[res_ptr+0] - addxcc %g3,%o5,%g3 - st %g3,[res_ptr+4] - ldd [s1_ptr+8],%g2 - ldd [s2_ptr+8],%o4 - addxcc %g2,%o4,%g2 - st %g2,[res_ptr+8] - addxcc %g3,%o5,%g3 - st %g3,[res_ptr+12] - ldd [s1_ptr+16],%g2 - ldd [s2_ptr+16],%o4 - addxcc %g2,%o4,%g2 - st %g2,[res_ptr+16] - addxcc %g3,%o5,%g3 - st %g3,[res_ptr+20] - ldd [s1_ptr+24],%g2 - ldd [s2_ptr+24],%o4 - addxcc %g2,%o4,%g2 - st %g2,[res_ptr+24] - addxcc %g3,%o5,%g3 - st %g3,[res_ptr+28] - addx %g0,%g0,%o4 C save cy in register - addcc n,-8,n - add s1_ptr,32,s1_ptr - add s2_ptr,32,s2_ptr - add res_ptr,32,res_ptr - bge L(loop2) - subcc %g0,%o4,%g0 C restore cy - -L(fin2): - addcc n,8-2,n - blt L(end2) - subcc %g0,%o4,%g0 C restore cy -L(loope2): - ldd [s1_ptr+0],%g2 - ldd [s2_ptr+0],%o4 - addxcc %g2,%o4,%g2 - st %g2,[res_ptr+0] - addxcc %g3,%o5,%g3 - st %g3,[res_ptr+4] - addx %g0,%g0,%o4 C save cy in register - addcc n,-2,n - add s1_ptr,8,s1_ptr - add s2_ptr,8,s2_ptr - add res_ptr,8,res_ptr - bge L(loope2) - subcc %g0,%o4,%g0 C restore cy -L(end2): - andcc n,1,%g0 - be L(ret2) - subcc %g0,%o4,%g0 C restore cy -C Add last limb -L(jone): - ld [s1_ptr],%g4 - ld [s2_ptr],%g2 - addxcc %g4,%g2,%o4 - st %o4,[res_ptr] - -L(ret2): - retl - addx %g0,%g0,%o0 C return carry-out from most sign. limb -EPILOGUE(mpn_add_n) diff --git a/rts/gmp/mpn/sparc32/addmul_1.asm b/rts/gmp/mpn/sparc32/addmul_1.asm deleted file mode 100644 index 80c94e4251..0000000000 --- a/rts/gmp/mpn/sparc32/addmul_1.asm +++ /dev/null @@ -1,146 +0,0 @@ -dnl SPARC mpn_addmul_1 -- Multiply a limb vector with a limb and add the -dnl result to a second limb vector. - -dnl Copyright (C) 1992, 1993, 1994, 2000 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. - -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of the GNU Lesser General Public License as published -dnl by the Free Software Foundation; either version 2.1 of the License, or (at -dnl your option) any later version. - -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -dnl License for more details. - -dnl You should have received a copy of the GNU Lesser General Public License -dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to -dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, -dnl MA 02111-1307, USA. - - -include(`../config.m4') - -C INPUT PARAMETERS -C res_ptr o0 -C s1_ptr o1 -C size o2 -C s2_limb o3 - -ASM_START() -PROLOGUE(mpn_addmul_1) - C Make S1_PTR and RES_PTR point at the end of their blocks - C and put (- 4 x SIZE) in index/loop counter. - sll %o2,2,%o2 - add %o0,%o2,%o4 C RES_PTR in o4 since o0 is retval - add %o1,%o2,%o1 - sub %g0,%o2,%o2 - - cmp %o3,0xfff - bgu L(large) - nop - - ld [%o1+%o2],%o5 - mov 0,%o0 - b L(0) - add %o4,-4,%o4 -L(loop0): - addcc %o5,%g1,%g1 - ld [%o1+%o2],%o5 - addx %o0,%g0,%o0 - st %g1,[%o4+%o2] -L(0): wr %g0,%o3,%y - sra %o5,31,%g2 - and %o3,%g2,%g2 - andcc %g1,0,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,0,%g1 - sra %g1,20,%g4 - sll %g1,12,%g1 - rd %y,%g3 - srl %g3,20,%g3 - or %g1,%g3,%g1 - - addcc %g1,%o0,%g1 - addx %g2,%g4,%o0 C add sign-compensation and cy to hi limb - addcc %o2,4,%o2 C loop counter - bne L(loop0) - ld [%o4+%o2],%o5 - - addcc %o5,%g1,%g1 - addx %o0,%g0,%o0 - retl - st %g1,[%o4+%o2] - -L(large): - ld [%o1+%o2],%o5 - mov 0,%o0 - sra %o3,31,%g4 C g4 = mask of ones iff S2_LIMB < 0 - b L(1) - add %o4,-4,%o4 -L(loop): - addcc %o5,%g3,%g3 - ld [%o1+%o2],%o5 - addx %o0,%g0,%o0 - st %g3,[%o4+%o2] -L(1): wr %g0,%o5,%y - and %o5,%g4,%g2 - andcc %g0,%g0,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%g0,%g1 - rd %y,%g3 - addcc %g3,%o0,%g3 - addx %g2,%g1,%o0 - addcc %o2,4,%o2 - bne L(loop) - ld [%o4+%o2],%o5 - - addcc %o5,%g3,%g3 - addx %o0,%g0,%o0 - retl - st %g3,[%o4+%o2] -EPILOGUE(mpn_addmul_1) diff --git a/rts/gmp/mpn/sparc32/lshift.asm b/rts/gmp/mpn/sparc32/lshift.asm deleted file mode 100644 index 529733ac2d..0000000000 --- a/rts/gmp/mpn/sparc32/lshift.asm +++ /dev/null @@ -1,97 +0,0 @@ -dnl SPARC mpn_lshift -- Shift a number left. -dnl - -dnl Copyright (C) 1995, 1996, 2000 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. - -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of the GNU Lesser General Public License as published -dnl by the Free Software Foundation; either version 2.1 of the License, or (at -dnl your option) any later version. - -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -dnl License for more details. - -dnl You should have received a copy of the GNU Lesser General Public License -dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to -dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, -dnl MA 02111-1307, USA. - - -include(`../config.m4') - -C INPUT PARAMETERS -C res_ptr %o0 -C src_ptr %o1 -C size %o2 -C cnt %o3 - -ASM_START() -PROLOGUE(mpn_lshift) - sll %o2,2,%g1 - add %o1,%g1,%o1 C make %o1 point at end of src - ld [%o1-4],%g2 C load first limb - sub %g0,%o3,%o5 C negate shift count - add %o0,%g1,%o0 C make %o0 point at end of res - add %o2,-1,%o2 - andcc %o2,4-1,%g4 C number of limbs in first loop - srl %g2,%o5,%g1 C compute function result - be L(0) C if multiple of 4 limbs, skip first loop - st %g1,[%sp+80] - - sub %o2,%g4,%o2 C adjust count for main loop - -L(loop0): - ld [%o1-8],%g3 - add %o0,-4,%o0 - add %o1,-4,%o1 - addcc %g4,-1,%g4 - sll %g2,%o3,%o4 - srl %g3,%o5,%g1 - mov %g3,%g2 - or %o4,%g1,%o4 - bne L(loop0) - st %o4,[%o0+0] - -L(0): tst %o2 - be L(end) - nop - -L(loop): - ld [%o1-8],%g3 - add %o0,-16,%o0 - addcc %o2,-4,%o2 - sll %g2,%o3,%o4 - srl %g3,%o5,%g1 - - ld [%o1-12],%g2 - sll %g3,%o3,%g4 - or %o4,%g1,%o4 - st %o4,[%o0+12] - srl %g2,%o5,%g1 - - ld [%o1-16],%g3 - sll %g2,%o3,%o4 - or %g4,%g1,%g4 - st %g4,[%o0+8] - srl %g3,%o5,%g1 - - ld [%o1-20],%g2 - sll %g3,%o3,%g4 - or %o4,%g1,%o4 - st %o4,[%o0+4] - srl %g2,%o5,%g1 - - add %o1,-16,%o1 - or %g4,%g1,%g4 - bne L(loop) - st %g4,[%o0+0] - -L(end): sll %g2,%o3,%g2 - st %g2,[%o0-4] - retl - ld [%sp+80],%o0 -EPILOGUE(mpn_lshift) diff --git a/rts/gmp/mpn/sparc32/mul_1.asm b/rts/gmp/mpn/sparc32/mul_1.asm deleted file mode 100644 index e5fedeabaa..0000000000 --- a/rts/gmp/mpn/sparc32/mul_1.asm +++ /dev/null @@ -1,137 +0,0 @@ -dnl SPARC mpn_mul_1 -- Multiply a limb vector with a limb and store -dnl the result in a second limb vector. - -dnl Copyright (C) 1992, 1993, 1994, 2000 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. - -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of the GNU Lesser General Public License as published -dnl by the Free Software Foundation; either version 2.1 of the License, or (at -dnl your option) any later version. - -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -dnl License for more details. - -dnl You should have received a copy of the GNU Lesser General Public License -dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to -dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, -dnl MA 02111-1307, USA. - - -include(`../config.m4') - -C INPUT PARAMETERS -C res_ptr o0 -C s1_ptr o1 -C size o2 -C s2_limb o3 - -ASM_START() -PROLOGUE(mpn_mul_1) - C Make S1_PTR and RES_PTR point at the end of their blocks - C and put (- 4 x SIZE) in index/loop counter. - sll %o2,2,%o2 - add %o0,%o2,%o4 C RES_PTR in o4 since o0 is retval - add %o1,%o2,%o1 - sub %g0,%o2,%o2 - - cmp %o3,0xfff - bgu L(large) - nop - - ld [%o1+%o2],%o5 - mov 0,%o0 - b L(0) - add %o4,-4,%o4 -L(loop0): - st %g1,[%o4+%o2] -L(0): wr %g0,%o3,%y - sra %o5,31,%g2 - and %o3,%g2,%g2 - andcc %g1,0,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,0,%g1 - sra %g1,20,%g4 - sll %g1,12,%g1 - rd %y,%g3 - srl %g3,20,%g3 - or %g1,%g3,%g1 - - addcc %g1,%o0,%g1 - addx %g2,%g4,%o0 C add sign-compensation and cy to hi limb - addcc %o2,4,%o2 C loop counter - bne,a L(loop0) - ld [%o1+%o2],%o5 - - retl - st %g1,[%o4+%o2] - - -L(large): - ld [%o1+%o2],%o5 - mov 0,%o0 - sra %o3,31,%g4 C g4 = mask of ones iff S2_LIMB < 0 - b L(1) - add %o4,-4,%o4 -L(loop): - st %g3,[%o4+%o2] -L(1): wr %g0,%o5,%y - and %o5,%g4,%g2 C g2 = S1_LIMB iff S2_LIMB < 0, else 0 - andcc %g0,%g0,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%g0,%g1 - rd %y,%g3 - addcc %g3,%o0,%g3 - addx %g2,%g1,%o0 C add sign-compensation and cy to hi limb - addcc %o2,4,%o2 C loop counter - bne,a L(loop) - ld [%o1+%o2],%o5 - - retl - st %g3,[%o4+%o2] -EPILOGUE(mpn_mul_1) diff --git a/rts/gmp/mpn/sparc32/rshift.asm b/rts/gmp/mpn/sparc32/rshift.asm deleted file mode 100644 index 9187dbaa6f..0000000000 --- a/rts/gmp/mpn/sparc32/rshift.asm +++ /dev/null @@ -1,93 +0,0 @@ -dnl SPARC mpn_rshift -- Shift a number right. - -dnl Copyright (C) 1995, 1996, 2000 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. - -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of the GNU Lesser General Public License as published -dnl by the Free Software Foundation; either version 2.1 of the License, or (at -dnl your option) any later version. - -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -dnl License for more details. - -dnl You should have received a copy of the GNU Lesser General Public License -dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to -dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, -dnl MA 02111-1307, USA. - - -include(`../config.m4') - -C INPUT PARAMETERS -C res_ptr %o0 -C src_ptr %o1 -C size %o2 -C cnt %o3 - -ASM_START() -PROLOGUE(mpn_rshift) - ld [%o1],%g2 C load first limb - sub %g0,%o3,%o5 C negate shift count - add %o2,-1,%o2 - andcc %o2,4-1,%g4 C number of limbs in first loop - sll %g2,%o5,%g1 C compute function result - be L(0) C if multiple of 4 limbs, skip first loop - st %g1,[%sp+80] - - sub %o2,%g4,%o2 C adjust count for main loop - -L(loop0): - ld [%o1+4],%g3 - add %o0,4,%o0 - add %o1,4,%o1 - addcc %g4,-1,%g4 - srl %g2,%o3,%o4 - sll %g3,%o5,%g1 - mov %g3,%g2 - or %o4,%g1,%o4 - bne L(loop0) - st %o4,[%o0-4] - -L(0): tst %o2 - be L(end) - nop - -L(loop): - ld [%o1+4],%g3 - add %o0,16,%o0 - addcc %o2,-4,%o2 - srl %g2,%o3,%o4 - sll %g3,%o5,%g1 - - ld [%o1+8],%g2 - srl %g3,%o3,%g4 - or %o4,%g1,%o4 - st %o4,[%o0-16] - sll %g2,%o5,%g1 - - ld [%o1+12],%g3 - srl %g2,%o3,%o4 - or %g4,%g1,%g4 - st %g4,[%o0-12] - sll %g3,%o5,%g1 - - ld [%o1+16],%g2 - srl %g3,%o3,%g4 - or %o4,%g1,%o4 - st %o4,[%o0-8] - sll %g2,%o5,%g1 - - add %o1,16,%o1 - or %g4,%g1,%g4 - bne L(loop) - st %g4,[%o0-4] - -L(end): srl %g2,%o3,%g2 - st %g2,[%o0-0] - retl - ld [%sp+80],%o0 -EPILOGUE(mpn_rshift) diff --git a/rts/gmp/mpn/sparc32/sub_n.asm b/rts/gmp/mpn/sparc32/sub_n.asm deleted file mode 100644 index 071909a1b6..0000000000 --- a/rts/gmp/mpn/sparc32/sub_n.asm +++ /dev/null @@ -1,326 +0,0 @@ -dnl SPARC mpn_sub_n -- Subtract two limb vectors of the same length > 0 and -dnl store difference in a third limb vector. - -dnl Copyright (C) 1995, 1996, 2000 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. - -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of the GNU Lesser General Public License as published -dnl by the Free Software Foundation; either version 2.1 of the License, or (at -dnl your option) any later version. - -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -dnl License for more details. - -dnl You should have received a copy of the GNU Lesser General Public License -dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to -dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, -dnl MA 02111-1307, USA. - - -include(`../config.m4') - -C INPUT PARAMETERS -define(res_ptr,%o0) -define(s1_ptr,%o1) -define(s2_ptr,%o2) -define(n,%o3) - -ASM_START() -PROLOGUE(mpn_sub_n) - xor s2_ptr,res_ptr,%g1 - andcc %g1,4,%g0 - bne L(1) C branch if alignment differs - nop -C ** V1a ** - andcc res_ptr,4,%g0 C res_ptr unaligned? Side effect: cy=0 - be L(v1) C if no, branch - nop -C Add least significant limb separately to align res_ptr and s2_ptr - ld [s1_ptr],%g4 - add s1_ptr,4,s1_ptr - ld [s2_ptr],%g2 - add s2_ptr,4,s2_ptr - add n,-1,n - subcc %g4,%g2,%o4 - st %o4,[res_ptr] - add res_ptr,4,res_ptr -L(v1): addx %g0,%g0,%o4 C save cy in register - cmp n,2 C if n < 2 ... - bl L(end2) C ... branch to tail code - subcc %g0,%o4,%g0 C restore cy - - ld [s1_ptr+0],%g4 - addcc n,-10,n - ld [s1_ptr+4],%g1 - ldd [s2_ptr+0],%g2 - blt L(fin1) - subcc %g0,%o4,%g0 C restore cy -C Add blocks of 8 limbs until less than 8 limbs remain -L(loop1): - subxcc %g4,%g2,%o4 - ld [s1_ptr+8],%g4 - subxcc %g1,%g3,%o5 - ld [s1_ptr+12],%g1 - ldd [s2_ptr+8],%g2 - std %o4,[res_ptr+0] - subxcc %g4,%g2,%o4 - ld [s1_ptr+16],%g4 - subxcc %g1,%g3,%o5 - ld [s1_ptr+20],%g1 - ldd [s2_ptr+16],%g2 - std %o4,[res_ptr+8] - subxcc %g4,%g2,%o4 - ld [s1_ptr+24],%g4 - subxcc %g1,%g3,%o5 - ld [s1_ptr+28],%g1 - ldd [s2_ptr+24],%g2 - std %o4,[res_ptr+16] - subxcc %g4,%g2,%o4 - ld [s1_ptr+32],%g4 - subxcc %g1,%g3,%o5 - ld [s1_ptr+36],%g1 - ldd [s2_ptr+32],%g2 - std %o4,[res_ptr+24] - addx %g0,%g0,%o4 C save cy in register - addcc n,-8,n - add s1_ptr,32,s1_ptr - add s2_ptr,32,s2_ptr - add res_ptr,32,res_ptr - bge L(loop1) - subcc %g0,%o4,%g0 C restore cy - -L(fin1): - addcc n,8-2,n - blt L(end1) - subcc %g0,%o4,%g0 C restore cy -C Add blocks of 2 limbs until less than 2 limbs remain -L(loope1): - subxcc %g4,%g2,%o4 - ld [s1_ptr+8],%g4 - subxcc %g1,%g3,%o5 - ld [s1_ptr+12],%g1 - ldd [s2_ptr+8],%g2 - std %o4,[res_ptr+0] - addx %g0,%g0,%o4 C save cy in register - addcc n,-2,n - add s1_ptr,8,s1_ptr - add s2_ptr,8,s2_ptr - add res_ptr,8,res_ptr - bge L(loope1) - subcc %g0,%o4,%g0 C restore cy -L(end1): - subxcc %g4,%g2,%o4 - subxcc %g1,%g3,%o5 - std %o4,[res_ptr+0] - addx %g0,%g0,%o4 C save cy in register - - andcc n,1,%g0 - be L(ret1) - subcc %g0,%o4,%g0 C restore cy -C Add last limb - ld [s1_ptr+8],%g4 - ld [s2_ptr+8],%g2 - subxcc %g4,%g2,%o4 - st %o4,[res_ptr+8] - -L(ret1): - retl - addx %g0,%g0,%o0 C return carry-out from most sign. limb - -L(1): xor s1_ptr,res_ptr,%g1 - andcc %g1,4,%g0 - bne L(2) - nop -C ** V1b ** - andcc res_ptr,4,%g0 C res_ptr unaligned? Side effect: cy=0 - be L(v1b) C if no, branch - nop -C Add least significant limb separately to align res_ptr and s1_ptr - ld [s2_ptr],%g4 - add s2_ptr,4,s2_ptr - ld [s1_ptr],%g2 - add s1_ptr,4,s1_ptr - add n,-1,n - subcc %g2,%g4,%o4 - st %o4,[res_ptr] - add res_ptr,4,res_ptr -L(v1b): addx %g0,%g0,%o4 C save cy in register - cmp n,2 C if n < 2 ... - bl L(end2) C ... branch to tail code - subcc %g0,%o4,%g0 C restore cy - - ld [s2_ptr+0],%g4 - addcc n,-10,n - ld [s2_ptr+4],%g1 - ldd [s1_ptr+0],%g2 - blt L(fin1b) - subcc %g0,%o4,%g0 C restore cy -C Add blocks of 8 limbs until less than 8 limbs remain -L(loop1b): - subxcc %g2,%g4,%o4 - ld [s2_ptr+8],%g4 - subxcc %g3,%g1,%o5 - ld [s2_ptr+12],%g1 - ldd [s1_ptr+8],%g2 - std %o4,[res_ptr+0] - subxcc %g2,%g4,%o4 - ld [s2_ptr+16],%g4 - subxcc %g3,%g1,%o5 - ld [s2_ptr+20],%g1 - ldd [s1_ptr+16],%g2 - std %o4,[res_ptr+8] - subxcc %g2,%g4,%o4 - ld [s2_ptr+24],%g4 - subxcc %g3,%g1,%o5 - ld [s2_ptr+28],%g1 - ldd [s1_ptr+24],%g2 - std %o4,[res_ptr+16] - subxcc %g2,%g4,%o4 - ld [s2_ptr+32],%g4 - subxcc %g3,%g1,%o5 - ld [s2_ptr+36],%g1 - ldd [s1_ptr+32],%g2 - std %o4,[res_ptr+24] - addx %g0,%g0,%o4 C save cy in register - addcc n,-8,n - add s1_ptr,32,s1_ptr - add s2_ptr,32,s2_ptr - add res_ptr,32,res_ptr - bge L(loop1b) - subcc %g0,%o4,%g0 C restore cy - -L(fin1b): - addcc n,8-2,n - blt L(end1b) - subcc %g0,%o4,%g0 C restore cy -C Add blocks of 2 limbs until less than 2 limbs remain -L(loope1b): - subxcc %g2,%g4,%o4 - ld [s2_ptr+8],%g4 - subxcc %g3,%g1,%o5 - ld [s2_ptr+12],%g1 - ldd [s1_ptr+8],%g2 - std %o4,[res_ptr+0] - addx %g0,%g0,%o4 C save cy in register - addcc n,-2,n - add s1_ptr,8,s1_ptr - add s2_ptr,8,s2_ptr - add res_ptr,8,res_ptr - bge L(loope1b) - subcc %g0,%o4,%g0 C restore cy -L(end1b): - subxcc %g2,%g4,%o4 - subxcc %g3,%g1,%o5 - std %o4,[res_ptr+0] - addx %g0,%g0,%o4 C save cy in register - - andcc n,1,%g0 - be L(ret1b) - subcc %g0,%o4,%g0 C restore cy -C Add last limb - ld [s2_ptr+8],%g4 - ld [s1_ptr+8],%g2 - subxcc %g2,%g4,%o4 - st %o4,[res_ptr+8] - -L(ret1b): - retl - addx %g0,%g0,%o0 C return carry-out from most sign. limb - -C ** V2 ** -C If we come here, the alignment of s1_ptr and res_ptr as well as the -C alignment of s2_ptr and res_ptr differ. Since there are only two ways -C things can be aligned (that we care about) we now know that the alignment -C of s1_ptr and s2_ptr are the same. - -L(2): cmp n,1 - be L(jone) - nop - andcc s1_ptr,4,%g0 C s1_ptr unaligned? Side effect: cy=0 - be L(v2) C if no, branch - nop -C Add least significant limb separately to align s1_ptr and s2_ptr - ld [s1_ptr],%g4 - add s1_ptr,4,s1_ptr - ld [s2_ptr],%g2 - add s2_ptr,4,s2_ptr - add n,-1,n - subcc %g4,%g2,%o4 - st %o4,[res_ptr] - add res_ptr,4,res_ptr - -L(v2): addx %g0,%g0,%o4 C save cy in register - addcc n,-8,n - blt L(fin2) - subcc %g0,%o4,%g0 C restore cy -C Add blocks of 8 limbs until less than 8 limbs remain -L(loop2): - ldd [s1_ptr+0],%g2 - ldd [s2_ptr+0],%o4 - subxcc %g2,%o4,%g2 - st %g2,[res_ptr+0] - subxcc %g3,%o5,%g3 - st %g3,[res_ptr+4] - ldd [s1_ptr+8],%g2 - ldd [s2_ptr+8],%o4 - subxcc %g2,%o4,%g2 - st %g2,[res_ptr+8] - subxcc %g3,%o5,%g3 - st %g3,[res_ptr+12] - ldd [s1_ptr+16],%g2 - ldd [s2_ptr+16],%o4 - subxcc %g2,%o4,%g2 - st %g2,[res_ptr+16] - subxcc %g3,%o5,%g3 - st %g3,[res_ptr+20] - ldd [s1_ptr+24],%g2 - ldd [s2_ptr+24],%o4 - subxcc %g2,%o4,%g2 - st %g2,[res_ptr+24] - subxcc %g3,%o5,%g3 - st %g3,[res_ptr+28] - addx %g0,%g0,%o4 C save cy in register - addcc n,-8,n - add s1_ptr,32,s1_ptr - add s2_ptr,32,s2_ptr - add res_ptr,32,res_ptr - bge L(loop2) - subcc %g0,%o4,%g0 C restore cy - -L(fin2): - addcc n,8-2,n - blt L(end2) - subcc %g0,%o4,%g0 C restore cy -L(loope2): - ldd [s1_ptr+0],%g2 - ldd [s2_ptr+0],%o4 - subxcc %g2,%o4,%g2 - st %g2,[res_ptr+0] - subxcc %g3,%o5,%g3 - st %g3,[res_ptr+4] - addx %g0,%g0,%o4 C save cy in register - addcc n,-2,n - add s1_ptr,8,s1_ptr - add s2_ptr,8,s2_ptr - add res_ptr,8,res_ptr - bge L(loope2) - subcc %g0,%o4,%g0 C restore cy -L(end2): - andcc n,1,%g0 - be L(ret2) - subcc %g0,%o4,%g0 C restore cy -C Add last limb -L(jone): - ld [s1_ptr],%g4 - ld [s2_ptr],%g2 - subxcc %g4,%g2,%o4 - st %o4,[res_ptr] - -L(ret2): - retl - addx %g0,%g0,%o0 C return carry-out from most sign. limb -EPILOGUE(mpn_sub_n) diff --git a/rts/gmp/mpn/sparc32/submul_1.asm b/rts/gmp/mpn/sparc32/submul_1.asm deleted file mode 100644 index 12abd844ce..0000000000 --- a/rts/gmp/mpn/sparc32/submul_1.asm +++ /dev/null @@ -1,146 +0,0 @@ -dnl SPARC mpn_submul_1 -- Multiply a limb vector with a limb and subtract -dnl the result from a second limb vector. - -dnl Copyright (C) 1992, 1993, 1994, 2000 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. - -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of the GNU Lesser General Public License as published -dnl by the Free Software Foundation; either version 2.1 of the License, or (at -dnl your option) any later version. - -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -dnl License for more details. - -dnl You should have received a copy of the GNU Lesser General Public License -dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to -dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, -dnl MA 02111-1307, USA. - - -include(`../config.m4') - -C INPUT PARAMETERS -C res_ptr o0 -C s1_ptr o1 -C size o2 -C s2_limb o3 - -ASM_START() -PROLOGUE(mpn_submul_1) - C Make S1_PTR and RES_PTR point at the end of their blocks - C and put (- 4 x SIZE) in index/loop counter. - sll %o2,2,%o2 - add %o0,%o2,%o4 C RES_PTR in o4 since o0 is retval - add %o1,%o2,%o1 - sub %g0,%o2,%o2 - - cmp %o3,0xfff - bgu L(large) - nop - - ld [%o1+%o2],%o5 - mov 0,%o0 - b L(0) - add %o4,-4,%o4 -L(loop0): - subcc %o5,%g1,%g1 - ld [%o1+%o2],%o5 - addx %o0,%g0,%o0 - st %g1,[%o4+%o2] -L(0): wr %g0,%o3,%y - sra %o5,31,%g2 - and %o3,%g2,%g2 - andcc %g1,0,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,0,%g1 - sra %g1,20,%g4 - sll %g1,12,%g1 - rd %y,%g3 - srl %g3,20,%g3 - or %g1,%g3,%g1 - - addcc %g1,%o0,%g1 - addx %g2,%g4,%o0 C add sign-compensation and cy to hi limb - addcc %o2,4,%o2 C loop counter - bne L(loop0) - ld [%o4+%o2],%o5 - - subcc %o5,%g1,%g1 - addx %o0,%g0,%o0 - retl - st %g1,[%o4+%o2] - -L(large): - ld [%o1+%o2],%o5 - mov 0,%o0 - sra %o3,31,%g4 C g4 = mask of ones iff S2_LIMB < 0 - b L(1) - add %o4,-4,%o4 -L(loop): - subcc %o5,%g3,%g3 - ld [%o1+%o2],%o5 - addx %o0,%g0,%o0 - st %g3,[%o4+%o2] -L(1): wr %g0,%o5,%y - and %o5,%g4,%g2 - andcc %g0,%g0,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%g0,%g1 - rd %y,%g3 - addcc %g3,%o0,%g3 - addx %g2,%g1,%o0 - addcc %o2,4,%o2 - bne L(loop) - ld [%o4+%o2],%o5 - - subcc %o5,%g3,%g3 - addx %o0,%g0,%o0 - retl - st %g3,[%o4+%o2] -EPILOGUE(mpn_submul_1) diff --git a/rts/gmp/mpn/sparc32/udiv_fp.asm b/rts/gmp/mpn/sparc32/udiv_fp.asm deleted file mode 100644 index e340e147d2..0000000000 --- a/rts/gmp/mpn/sparc32/udiv_fp.asm +++ /dev/null @@ -1,158 +0,0 @@ -dnl SPARC v7 __udiv_qrnnd division support, used from longlong.h. -dnl This is for v7 CPUs with a floating-point unit. - -dnl Copyright (C) 1993, 1994, 1996, 2000 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. - -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of the GNU Lesser General Public License as published -dnl by the Free Software Foundation; either version 2.1 of the License, or (at -dnl your option) any later version. - -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -dnl License for more details. - -dnl You should have received a copy of the GNU Lesser General Public License -dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to -dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, -dnl MA 02111-1307, USA. - - -include(`../config.m4') - -C INPUT PARAMETERS -C rem_ptr i0 -C n1 i1 -C n0 i2 -C d i3 - -ASM_START() - -ifdef(`PIC', -` TEXT -L(getpc): - retl - nop') - - TEXT - ALIGN(8) -L(C0): .double 0r4294967296 -L(C1): .double 0r2147483648 - -PROLOGUE(mpn_udiv_qrnnd) - save %sp,-104,%sp - st %i1,[%fp-8] - ld [%fp-8],%f10 - -ifdef(`PIC', -`L(pc): call L(getpc) C put address of this insn in %o7 - ldd [%o7+L(C0)-L(pc)],%f8', -` sethi %hi(L(C0)),%o7 - ldd [%o7+%lo(L(C0))],%f8') - - fitod %f10,%f4 - cmp %i1,0 - bge L(248) - mov %i0,%i5 - faddd %f4,%f8,%f4 -L(248): - st %i2,[%fp-8] - ld [%fp-8],%f10 - fmuld %f4,%f8,%f6 - cmp %i2,0 - bge L(249) - fitod %f10,%f2 - faddd %f2,%f8,%f2 -L(249): - st %i3,[%fp-8] - faddd %f6,%f2,%f2 - ld [%fp-8],%f10 - cmp %i3,0 - bge L(250) - fitod %f10,%f4 - faddd %f4,%f8,%f4 -L(250): - fdivd %f2,%f4,%f2 - -ifdef(`PIC', -` ldd [%o7+L(C1)-L(pc)],%f4', -` sethi %hi(L(C1)),%o7 - ldd [%o7+%lo(L(C1))],%f4') - - fcmped %f2,%f4 - nop - fbge,a L(251) - fsubd %f2,%f4,%f2 - fdtoi %f2,%f2 - st %f2,[%fp-8] - b L(252) - ld [%fp-8],%i4 -L(251): - fdtoi %f2,%f2 - st %f2,[%fp-8] - ld [%fp-8],%i4 - sethi %hi(-2147483648),%g2 - xor %i4,%g2,%i4 -L(252): - wr %g0,%i4,%y - sra %i3,31,%g2 - and %i4,%g2,%g2 - andcc %g0,0,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,0,%g1 - add %g1,%g2,%i0 - rd %y,%g3 - subcc %i2,%g3,%o7 - subxcc %i1,%i0,%g0 - be L(253) - cmp %o7,%i3 - - add %i4,-1,%i0 - add %o7,%i3,%o7 - st %o7,[%i5] - ret - restore -L(253): - blu L(246) - mov %i4,%i0 - add %i4,1,%i0 - sub %o7,%i3,%o7 -L(246): - st %o7,[%i5] - ret - restore -EPILOGUE(mpn_udiv_qrnnd) diff --git a/rts/gmp/mpn/sparc32/udiv_nfp.asm b/rts/gmp/mpn/sparc32/udiv_nfp.asm deleted file mode 100644 index ae19f4c6e9..0000000000 --- a/rts/gmp/mpn/sparc32/udiv_nfp.asm +++ /dev/null @@ -1,193 +0,0 @@ -dnl SPARC v7 __udiv_qrnnd division support, used from longlong.h. -dnl This is for v7 CPUs without a floating-point unit. - -dnl Copyright (C) 1993, 1994, 1996, 2000 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. - -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of the GNU Lesser General Public License as published -dnl by the Free Software Foundation; either version 2.1 of the License, or (at -dnl your option) any later version. - -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -dnl License for more details. - -dnl You should have received a copy of the GNU Lesser General Public License -dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to -dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, -dnl MA 02111-1307, USA. - - -include(`../config.m4') - -C INPUT PARAMETERS -C rem_ptr o0 -C n1 o1 -C n0 o2 -C d o3 - -ASM_START() -PROLOGUE(mpn_udiv_qrnnd) - tst %o3 - bneg L(largedivisor) - mov 8,%g1 - - b L(p1) - addxcc %o2,%o2,%o2 - -L(plop): - bcc L(n1) - addxcc %o2,%o2,%o2 -L(p1): addx %o1,%o1,%o1 - subcc %o1,%o3,%o4 - bcc L(n2) - addxcc %o2,%o2,%o2 -L(p2): addx %o1,%o1,%o1 - subcc %o1,%o3,%o4 - bcc L(n3) - addxcc %o2,%o2,%o2 -L(p3): addx %o1,%o1,%o1 - subcc %o1,%o3,%o4 - bcc L(n4) - addxcc %o2,%o2,%o2 -L(p4): addx %o1,%o1,%o1 - addcc %g1,-1,%g1 - bne L(plop) - subcc %o1,%o3,%o4 - bcc L(n5) - addxcc %o2,%o2,%o2 -L(p5): st %o1,[%o0] - retl - xnor %g0,%o2,%o0 - -L(nlop): - bcc L(p1) - addxcc %o2,%o2,%o2 -L(n1): addx %o4,%o4,%o4 - subcc %o4,%o3,%o1 - bcc L(p2) - addxcc %o2,%o2,%o2 -L(n2): addx %o4,%o4,%o4 - subcc %o4,%o3,%o1 - bcc L(p3) - addxcc %o2,%o2,%o2 -L(n3): addx %o4,%o4,%o4 - subcc %o4,%o3,%o1 - bcc L(p4) - addxcc %o2,%o2,%o2 -L(n4): addx %o4,%o4,%o4 - addcc %g1,-1,%g1 - bne L(nlop) - subcc %o4,%o3,%o1 - bcc L(p5) - addxcc %o2,%o2,%o2 -L(n5): st %o4,[%o0] - retl - xnor %g0,%o2,%o0 - -L(largedivisor): - and %o2,1,%o5 C %o5 = n0 & 1 - - srl %o2,1,%o2 - sll %o1,31,%g2 - or %g2,%o2,%o2 C %o2 = lo(n1n0 >> 1) - srl %o1,1,%o1 C %o1 = hi(n1n0 >> 1) - - and %o3,1,%g2 - srl %o3,1,%g3 C %g3 = floor(d / 2) - add %g3,%g2,%g3 C %g3 = ceil(d / 2) - - b L(Lp1) - addxcc %o2,%o2,%o2 - -L(Lplop): - bcc L(Ln1) - addxcc %o2,%o2,%o2 -L(Lp1): addx %o1,%o1,%o1 - subcc %o1,%g3,%o4 - bcc L(Ln2) - addxcc %o2,%o2,%o2 -L(Lp2): addx %o1,%o1,%o1 - subcc %o1,%g3,%o4 - bcc L(Ln3) - addxcc %o2,%o2,%o2 -L(Lp3): addx %o1,%o1,%o1 - subcc %o1,%g3,%o4 - bcc L(Ln4) - addxcc %o2,%o2,%o2 -L(Lp4): addx %o1,%o1,%o1 - addcc %g1,-1,%g1 - bne L(Lplop) - subcc %o1,%g3,%o4 - bcc L(Ln5) - addxcc %o2,%o2,%o2 -L(Lp5): add %o1,%o1,%o1 C << 1 - tst %g2 - bne L(oddp) - add %o5,%o1,%o1 - st %o1,[%o0] - retl - xnor %g0,%o2,%o0 - -L(Lnlop): - bcc L(Lp1) - addxcc %o2,%o2,%o2 -L(Ln1): addx %o4,%o4,%o4 - subcc %o4,%g3,%o1 - bcc L(Lp2) - addxcc %o2,%o2,%o2 -L(Ln2): addx %o4,%o4,%o4 - subcc %o4,%g3,%o1 - bcc L(Lp3) - addxcc %o2,%o2,%o2 -L(Ln3): addx %o4,%o4,%o4 - subcc %o4,%g3,%o1 - bcc L(Lp4) - addxcc %o2,%o2,%o2 -L(Ln4): addx %o4,%o4,%o4 - addcc %g1,-1,%g1 - bne L(Lnlop) - subcc %o4,%g3,%o1 - bcc L(Lp5) - addxcc %o2,%o2,%o2 -L(Ln5): add %o4,%o4,%o4 C << 1 - tst %g2 - bne L(oddn) - add %o5,%o4,%o4 - st %o4,[%o0] - retl - xnor %g0,%o2,%o0 - -L(oddp): - xnor %g0,%o2,%o2 - C q' in %o2. r' in %o1 - addcc %o1,%o2,%o1 - bcc L(Lp6) - addx %o2,0,%o2 - sub %o1,%o3,%o1 -L(Lp6): subcc %o1,%o3,%g0 - bcs L(Lp7) - subx %o2,-1,%o2 - sub %o1,%o3,%o1 -L(Lp7): st %o1,[%o0] - retl - mov %o2,%o0 - -L(oddn): - xnor %g0,%o2,%o2 - C q' in %o2. r' in %o4 - addcc %o4,%o2,%o4 - bcc L(Ln6) - addx %o2,0,%o2 - sub %o4,%o3,%o4 -L(Ln6): subcc %o4,%o3,%g0 - bcs L(Ln7) - subx %o2,-1,%o2 - sub %o4,%o3,%o4 -L(Ln7): st %o4,[%o0] - retl - mov %o2,%o0 -EPILOGUE(mpn_udiv_qrnnd) diff --git a/rts/gmp/mpn/sparc32/umul.asm b/rts/gmp/mpn/sparc32/umul.asm deleted file mode 100644 index efa56851d6..0000000000 --- a/rts/gmp/mpn/sparc32/umul.asm +++ /dev/null @@ -1,68 +0,0 @@ -dnl SPARC mpn_umul_ppmm -- support for longlong.h for non-gcc. - -dnl Copyright (C) 1995, 1996, 2000 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. - -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of the GNU Lesser General Public License as published -dnl by the Free Software Foundation; either version 2.1 of the License, or (at -dnl your option) any later version. - -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -dnl License for more details. - -dnl You should have received a copy of the GNU Lesser General Public License -dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to -dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, -dnl MA 02111-1307, USA. - - -include(`../config.m4') - -ASM_START() -PROLOGUE(mpn_umul_ppmm) - wr %g0,%o1,%y - sra %o2,31,%g2 C Don't move this insn - and %o1,%g2,%g2 C Don't move this insn - andcc %g0,0,%g1 C Don't move this insn - mulscc %g1,%o2,%g1 - mulscc %g1,%o2,%g1 - mulscc %g1,%o2,%g1 - mulscc %g1,%o2,%g1 - mulscc %g1,%o2,%g1 - mulscc %g1,%o2,%g1 - mulscc %g1,%o2,%g1 - mulscc %g1,%o2,%g1 - mulscc %g1,%o2,%g1 - mulscc %g1,%o2,%g1 - mulscc %g1,%o2,%g1 - mulscc %g1,%o2,%g1 - mulscc %g1,%o2,%g1 - mulscc %g1,%o2,%g1 - mulscc %g1,%o2,%g1 - mulscc %g1,%o2,%g1 - mulscc %g1,%o2,%g1 - mulscc %g1,%o2,%g1 - mulscc %g1,%o2,%g1 - mulscc %g1,%o2,%g1 - mulscc %g1,%o2,%g1 - mulscc %g1,%o2,%g1 - mulscc %g1,%o2,%g1 - mulscc %g1,%o2,%g1 - mulscc %g1,%o2,%g1 - mulscc %g1,%o2,%g1 - mulscc %g1,%o2,%g1 - mulscc %g1,%o2,%g1 - mulscc %g1,%o2,%g1 - mulscc %g1,%o2,%g1 - mulscc %g1,%o2,%g1 - mulscc %g1,%o2,%g1 - mulscc %g1,0,%g1 - rd %y,%g3 - st %g3,[%o0] - retl - add %g1,%g2,%o0 -EPILOGUE(mpn_umul_ppmm) diff --git a/rts/gmp/mpn/sparc32/v8/addmul_1.asm b/rts/gmp/mpn/sparc32/v8/addmul_1.asm deleted file mode 100644 index da44644b51..0000000000 --- a/rts/gmp/mpn/sparc32/v8/addmul_1.asm +++ /dev/null @@ -1,122 +0,0 @@ -dnl SPARC v8 mpn_addmul_1 -- Multiply a limb vector with a limb and -dnl add the result to a second limb vector. - -dnl Copyright (C) 1992, 1993, 1994, 1995, 2000 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. - -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of the GNU Lesser General Public License as published -dnl by the Free Software Foundation; either version 2.1 of the License, or (at -dnl your option) any later version. - -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -dnl License for more details. - -dnl You should have received a copy of the GNU Lesser General Public License -dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to -dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, -dnl MA 02111-1307, USA. - - -include(`../config.m4') - -C INPUT PARAMETERS -C res_ptr o0 -C s1_ptr o1 -C size o2 -C s2_limb o3 - -ASM_START() -PROLOGUE(mpn_addmul_1) - orcc %g0,%g0,%g2 - ld [%o1+0],%o4 C 1 - - sll %o2,4,%g1 - and %g1,(4-1)<<4,%g1 -ifdef(`PIC', -` mov %o7,%g4 C Save return address register -0: call 1f - add %o7,L(1)-0b,%g3 -1: mov %g4,%o7 C Restore return address register -', -` sethi %hi(L(1)),%g3 - or %g3,%lo(L(1)),%g3 -') - jmp %g3+%g1 - nop -L(1): -L(L00): add %o0,-4,%o0 - b L(loop00) C 4, 8, 12, ... - add %o1,-4,%o1 - nop -L(L01): b L(loop01) C 1, 5, 9, ... - nop - nop - nop -L(L10): add %o0,-12,%o0 C 2, 6, 10, ... - b L(loop10) - add %o1,4,%o1 - nop -L(L11): add %o0,-8,%o0 C 3, 7, 11, ... - b L(loop11) - add %o1,-8,%o1 - nop - -L(loop): - addcc %g3,%g2,%g3 C 1 - ld [%o1+4],%o4 C 2 - rd %y,%g2 C 1 - addx %g0,%g2,%g2 - ld [%o0+0],%g1 C 2 - addcc %g1,%g3,%g3 - st %g3,[%o0+0] C 1 -L(loop00): - umul %o4,%o3,%g3 C 2 - ld [%o0+4],%g1 C 2 - addxcc %g3,%g2,%g3 C 2 - ld [%o1+8],%o4 C 3 - rd %y,%g2 C 2 - addx %g0,%g2,%g2 - nop - addcc %g1,%g3,%g3 - st %g3,[%o0+4] C 2 -L(loop11): - umul %o4,%o3,%g3 C 3 - addxcc %g3,%g2,%g3 C 3 - ld [%o1+12],%o4 C 4 - rd %y,%g2 C 3 - add %o1,16,%o1 - addx %g0,%g2,%g2 - ld [%o0+8],%g1 C 2 - addcc %g1,%g3,%g3 - st %g3,[%o0+8] C 3 -L(loop10): - umul %o4,%o3,%g3 C 4 - addxcc %g3,%g2,%g3 C 4 - ld [%o1+0],%o4 C 1 - rd %y,%g2 C 4 - addx %g0,%g2,%g2 - ld [%o0+12],%g1 C 2 - addcc %g1,%g3,%g3 - st %g3,[%o0+12] C 4 - add %o0,16,%o0 - addx %g0,%g2,%g2 -L(loop01): - addcc %o2,-4,%o2 - bg L(loop) - umul %o4,%o3,%g3 C 1 - - addcc %g3,%g2,%g3 C 4 - rd %y,%g2 C 4 - addx %g0,%g2,%g2 - ld [%o0+0],%g1 C 2 - addcc %g1,%g3,%g3 - st %g3,[%o0+0] C 4 - addx %g0,%g2,%o0 - - retl - nop -EPILOGUE(mpn_addmul_1) diff --git a/rts/gmp/mpn/sparc32/v8/mul_1.asm b/rts/gmp/mpn/sparc32/v8/mul_1.asm deleted file mode 100644 index 801247553a..0000000000 --- a/rts/gmp/mpn/sparc32/v8/mul_1.asm +++ /dev/null @@ -1,103 +0,0 @@ -dnl SPARC v8 mpn_mul_1 -- Multiply a limb vector with a single limb and -dnl store the product in a second limb vector. - -dnl Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. - -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of the GNU Lesser General Public License as published -dnl by the Free Software Foundation; either version 2.1 of the License, or (at -dnl your option) any later version. - -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -dnl License for more details. - -dnl You should have received a copy of the GNU Lesser General Public License -dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to -dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, -dnl MA 02111-1307, USA. - - -include(`../config.m4') - -C INPUT PARAMETERS -C res_ptr o0 -C s1_ptr o1 -C size o2 -C s2_limb o3 - -ASM_START() -PROLOGUE(mpn_mul_1) - sll %o2,4,%g1 - and %g1,(4-1)<<4,%g1 -ifdef(`PIC', -` mov %o7,%g4 C Save return address register -0: call 1f - add %o7,L(1)-0b,%g3 -1: mov %g4,%o7 C Restore return address register -', -` sethi %hi(L(1)),%g3 - or %g3,%lo(L(1)),%g3 -') - jmp %g3+%g1 - ld [%o1+0],%o4 C 1 -L(1): -L(L00): add %o0,-4,%o0 - add %o1,-4,%o1 - b L(loop00) C 4, 8, 12, ... - orcc %g0,%g0,%g2 -L(L01): b L(loop01) C 1, 5, 9, ... - orcc %g0,%g0,%g2 - nop - nop -L(L10): add %o0,-12,%o0 C 2, 6, 10, ... - add %o1,4,%o1 - b L(loop10) - orcc %g0,%g0,%g2 - nop -L(L11): add %o0,-8,%o0 C 3, 7, 11, ... - add %o1,-8,%o1 - b L(loop11) - orcc %g0,%g0,%g2 - -L(loop): - addcc %g3,%g2,%g3 C 1 - ld [%o1+4],%o4 C 2 - st %g3,[%o0+0] C 1 - rd %y,%g2 C 1 -L(loop00): - umul %o4,%o3,%g3 C 2 - addxcc %g3,%g2,%g3 C 2 - ld [%o1+8],%o4 C 3 - st %g3,[%o0+4] C 2 - rd %y,%g2 C 2 -L(loop11): - umul %o4,%o3,%g3 C 3 - addxcc %g3,%g2,%g3 C 3 - ld [%o1+12],%o4 C 4 - add %o1,16,%o1 - st %g3,[%o0+8] C 3 - rd %y,%g2 C 3 -L(loop10): - umul %o4,%o3,%g3 C 4 - addxcc %g3,%g2,%g3 C 4 - ld [%o1+0],%o4 C 1 - st %g3,[%o0+12] C 4 - add %o0,16,%o0 - rd %y,%g2 C 4 - addx %g0,%g2,%g2 -L(loop01): - addcc %o2,-4,%o2 - bg L(loop) - umul %o4,%o3,%g3 C 1 - - addcc %g3,%g2,%g3 C 4 - st %g3,[%o0+0] C 4 - rd %y,%g2 C 4 - - retl - addx %g0,%g2,%o0 -EPILOGUE(mpn_mul_1) diff --git a/rts/gmp/mpn/sparc32/v8/submul_1.asm b/rts/gmp/mpn/sparc32/v8/submul_1.asm deleted file mode 100644 index 9ed132f4c1..0000000000 --- a/rts/gmp/mpn/sparc32/v8/submul_1.asm +++ /dev/null @@ -1,58 +0,0 @@ -dnl SPARC v8 mpn_submul_1 -- Multiply a limb vector with a limb and -dnl subtract the result from a second limb vector. - -dnl Copyright (C) 1992, 1993, 1994, 2000 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. - -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of the GNU Lesser General Public License as published -dnl by the Free Software Foundation; either version 2.1 of the License, or (at -dnl your option) any later version. - -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -dnl License for more details. - -dnl You should have received a copy of the GNU Lesser General Public License -dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to -dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, -dnl MA 02111-1307, USA. - - -include(`../config.m4') - -C INPUT PARAMETERS -C res_ptr o0 -C s1_ptr o1 -C size o2 -C s2_limb o3 - -ASM_START() -PROLOGUE(mpn_submul_1) - sub %g0,%o2,%o2 C negate ... - sll %o2,2,%o2 C ... and scale size - sub %o1,%o2,%o1 C o1 is offset s1_ptr - sub %o0,%o2,%g1 C g1 is offset res_ptr - - mov 0,%o0 C clear cy_limb - -L(loop): - ld [%o1+%o2],%o4 - ld [%g1+%o2],%g2 - umul %o4,%o3,%o5 - rd %y,%g3 - addcc %o5,%o0,%o5 - addx %g3,0,%o0 - subcc %g2,%o5,%g2 - addx %o0,0,%o0 - st %g2,[%g1+%o2] - - addcc %o2,4,%o2 - bne L(loop) - nop - - retl - nop -EPILOGUE(mpn_submul_1) diff --git a/rts/gmp/mpn/sparc32/v8/supersparc/udiv.asm b/rts/gmp/mpn/sparc32/v8/supersparc/udiv.asm deleted file mode 100644 index 0d5e8d415d..0000000000 --- a/rts/gmp/mpn/sparc32/v8/supersparc/udiv.asm +++ /dev/null @@ -1,122 +0,0 @@ -dnl SuperSPARC mpn_udiv_qrnnd division support, used from longlong.h. -dnl This is for SuperSPARC only, to compensate for its semi-functional -dnl udiv instruction. - -dnl Copyright (C) 1993, 1994, 1996, 2000 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. - -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of the GNU Lesser General Public License as published -dnl by the Free Software Foundation; either version 2.1 of the License, or (at -dnl your option) any later version. - -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -dnl License for more details. - -dnl You should have received a copy of the GNU Lesser General Public License -dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to -dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, -dnl MA 02111-1307, USA. - - -include(`../config.m4') - -C INPUT PARAMETERS -C rem_ptr i0 -C n1 i1 -C n0 i2 -C d i3 - -ASM_START() - -ifdef(`PIC', -` TEXT -L(getpc): - retl - nop') - - TEXT - ALIGN(8) -L(C0): .double 0r4294967296 -L(C1): .double 0r2147483648 - -PROLOGUE(mpn_udiv_qrnnd) - save %sp,-104,%sp - st %i1,[%fp-8] - ld [%fp-8],%f10 - -ifdef(`PIC', -`L(pc): call L(getpc) C put address of this insn in %o7 - ldd [%o7+L(C0)-L(pc)],%f8', -` sethi %hi(L(C0)),%o7 - ldd [%o7+%lo(L(C0))],%f8') - - fitod %f10,%f4 - cmp %i1,0 - bge L(248) - mov %i0,%i5 - faddd %f4,%f8,%f4 -L(248): - st %i2,[%fp-8] - ld [%fp-8],%f10 - fmuld %f4,%f8,%f6 - cmp %i2,0 - bge L(249) - fitod %f10,%f2 - faddd %f2,%f8,%f2 -L(249): - st %i3,[%fp-8] - faddd %f6,%f2,%f2 - ld [%fp-8],%f10 - cmp %i3,0 - bge L(250) - fitod %f10,%f4 - faddd %f4,%f8,%f4 -L(250): - fdivd %f2,%f4,%f2 - -ifdef(`PIC', -` ldd [%o7+L(C1)-L(pc)],%f4', -` sethi %hi(L(C1)),%o7 - ldd [%o7+%lo(L(C1))],%f4') - - fcmped %f2,%f4 - nop - fbge,a L(251) - fsubd %f2,%f4,%f2 - fdtoi %f2,%f2 - st %f2,[%fp-8] - b L(252) - ld [%fp-8],%i4 -L(251): - fdtoi %f2,%f2 - st %f2,[%fp-8] - ld [%fp-8],%i4 - sethi %hi(-2147483648),%g2 - xor %i4,%g2,%i4 -L(252): - umul %i3,%i4,%g3 - rd %y,%i0 - subcc %i2,%g3,%o7 - subxcc %i1,%i0,%g0 - be L(253) - cmp %o7,%i3 - - add %i4,-1,%i0 - add %o7,%i3,%o7 - st %o7,[%i5] - ret - restore -L(253): - blu L(246) - mov %i4,%i0 - add %i4,1,%i0 - sub %o7,%i3,%o7 -L(246): - st %o7,[%i5] - ret - restore -EPILOGUE(mpn_udiv_qrnnd) diff --git a/rts/gmp/mpn/sparc32/v8/umul.asm b/rts/gmp/mpn/sparc32/v8/umul.asm deleted file mode 100644 index ae8f692a0a..0000000000 --- a/rts/gmp/mpn/sparc32/v8/umul.asm +++ /dev/null @@ -1,31 +0,0 @@ -dnl SPARC v8 mpn_umul_ppmm -- support for longlong.h for non-gcc. - -dnl Copyright (C) 1995, 1996, 2000 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. - -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of the GNU Lesser General Public License as published -dnl by the Free Software Foundation; either version 2.1 of the License, or (at -dnl your option) any later version. - -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -dnl License for more details. - -dnl You should have received a copy of the GNU Lesser General Public License -dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to -dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, -dnl MA 02111-1307, USA. - - -include(`../config.m4') - -ASM_START() -PROLOGUE(mpn_umul_ppmm) - umul %o1,%o2,%g2 - st %g2,[%o0] - retl - rd %y,%o0 -EPILOGUE(mpn_umul_ppmm) diff --git a/rts/gmp/mpn/sparc32/v9/README b/rts/gmp/mpn/sparc32/v9/README deleted file mode 100644 index 9b39713271..0000000000 --- a/rts/gmp/mpn/sparc32/v9/README +++ /dev/null @@ -1,4 +0,0 @@ -Code for SPARC processors implementing version 9 of the SPARC architecture. -This code is for systems that doesn't preserve the full 64-bit contents of -integer register at context switch. For other systems (such as Solaris 7 or -later) use the code in ../../sparc64. diff --git a/rts/gmp/mpn/sparc32/v9/addmul_1.asm b/rts/gmp/mpn/sparc32/v9/addmul_1.asm deleted file mode 100644 index c1762cc41f..0000000000 --- a/rts/gmp/mpn/sparc32/v9/addmul_1.asm +++ /dev/null @@ -1,288 +0,0 @@ -dnl SPARC v9 32-bit mpn_addmul_1 -- Multiply a limb vector with a limb and -dnl add the result to a second limb vector. - -dnl Copyright (C) 1998, 2000 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. - -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of the GNU Lesser General Public License as published -dnl by the Free Software Foundation; either version 2.1 of the License, or (at -dnl your option) any later version. - -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -dnl License for more details. - -dnl You should have received a copy of the GNU Lesser General Public License -dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to -dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, -dnl MA 02111-1307, USA. - - -include(`../config.m4') - -C INPUT PARAMETERS -C res_ptr i0 -C s1_ptr i1 -C size i2 -C s2_limb i3 - -ASM_START() - - TEXT - ALIGN(4) -L(noll): - .word 0 - -PROLOGUE(mpn_addmul_1) - save %sp,-256,%sp - -ifdef(`PIC', -`L(pc): rd %pc,%o7 - ld [%o7+L(noll)-L(pc)],%f10', -` sethi %hi(L(noll)),%g1 - ld [%g1+%lo(L(noll))],%f10') - - sethi %hi(0xffff0000),%o0 - andn %i3,%o0,%o0 - st %o0,[%fp-16] - ld [%fp-16],%f11 - fxtod %f10,%f6 - - srl %i3,16,%o0 - st %o0,[%fp-16] - ld [%fp-16],%f11 - fxtod %f10,%f8 - - mov 0,%g3 C cy = 0 - - ld [%i1],%f11 - subcc %i2,1,%i2 - be,pn %icc,L(end1) - add %i1,4,%i1 C s1_ptr++ - - fxtod %f10,%f2 - ld [%i1],%f11 - add %i1,4,%i1 C s1_ptr++ - fmuld %f2,%f8,%f16 - fmuld %f2,%f6,%f4 - fdtox %f16,%f14 - std %f14,[%fp-24] - fdtox %f4,%f12 - subcc %i2,1,%i2 - be,pn %icc,L(end2) - std %f12,[%fp-16] - - fxtod %f10,%f2 - ld [%i1],%f11 - add %i1,4,%i1 C s1_ptr++ - fmuld %f2,%f8,%f16 - fmuld %f2,%f6,%f4 - fdtox %f16,%f14 - std %f14,[%fp-40] - fdtox %f4,%f12 - subcc %i2,1,%i2 - be,pn %icc,L(end3) - std %f12,[%fp-32] - - fxtod %f10,%f2 - ld [%i1],%f11 - add %i1,4,%i1 C s1_ptr++ - ld [%i0],%g5 - ldx [%fp-24],%g2 C p16 - fmuld %f2,%f8,%f16 - ldx [%fp-16],%g1 C p0 - fmuld %f2,%f6,%f4 - sllx %g2,16,%g2 C align p16 - fdtox %f16,%f14 - add %g2,%g1,%g1 C add p16 to p0 (ADD1) - std %f14,[%fp-24] - fdtox %f4,%f12 - add %i0,4,%i0 C res_ptr++ - subcc %i2,1,%i2 - be,pn %icc,L(end4) - std %f12,[%fp-16] - - b,a L(loopm) - - .align 16 -C BEGIN LOOP -L(loop): - fxtod %f10,%f2 - ld [%i1],%f11 - add %i1,4,%i1 C s1_ptr++ - add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2) - add %g3,%g1,%g4 C p += cy - ld [%i0],%g5 - srlx %g4,32,%g3 - ldx [%fp-24],%g2 C p16 - fmuld %f2,%f8,%f16 - ldx [%fp-16],%g1 C p0 - fmuld %f2,%f6,%f4 - sllx %g2,16,%g2 C align p16 - st %g4,[%i0-4] - fdtox %f16,%f14 - add %g2,%g1,%g1 C add p16 to p0 (ADD1) - std %f14,[%fp-24] - fdtox %f4,%f12 - std %f12,[%fp-16] - subcc %i2,1,%i2 - be,pn %icc,L(loope) - add %i0,4,%i0 C res_ptr++ -L(loopm): - fxtod %f10,%f2 - ld [%i1],%f11 - add %i1,4,%i1 C s1_ptr++ - add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2) - add %g3,%g1,%g4 C p += cy - ld [%i0],%g5 - srlx %g4,32,%g3 - ldx [%fp-40],%g2 C p16 - fmuld %f2,%f8,%f16 - ldx [%fp-32],%g1 C p0 - fmuld %f2,%f6,%f4 - sllx %g2,16,%g2 C align p16 - st %g4,[%i0-4] - fdtox %f16,%f14 - add %g2,%g1,%g1 C add p16 to p0 (ADD1) - std %f14,[%fp-40] - fdtox %f4,%f12 - std %f12,[%fp-32] - subcc %i2,1,%i2 - bne,pt %icc,L(loop) - add %i0,4,%i0 C res_ptr++ -C END LOOP - - fxtod %f10,%f2 - add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2) - add %g3,%g1,%g4 C p += cy - ld [%i0],%g5 - srlx %g4,32,%g3 - ldx [%fp-24],%g2 C p16 - fmuld %f2,%f8,%f16 - ldx [%fp-16],%g1 C p0 - fmuld %f2,%f6,%f4 - sllx %g2,16,%g2 C align p16 - st %g4,[%i0-4] - b,a L(xxx) -L(loope): -L(end4): - fxtod %f10,%f2 - add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2) - add %g3,%g1,%g4 C p += cy - ld [%i0],%g5 - srlx %g4,32,%g3 - ldx [%fp-40],%g2 C p16 - fmuld %f2,%f8,%f16 - ldx [%fp-32],%g1 C p0 - fmuld %f2,%f6,%f4 - sllx %g2,16,%g2 C align p16 - st %g4,[%i0-4] - fdtox %f16,%f14 - add %g2,%g1,%g1 C add p16 to p0 (ADD1) - std %f14,[%fp-40] - fdtox %f4,%f12 - std %f12,[%fp-32] - add %i0,4,%i0 C res_ptr++ - - add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2) - add %g3,%g1,%g4 C p += cy - ld [%i0],%g5 - srlx %g4,32,%g3 - ldx [%fp-24],%g2 C p16 - ldx [%fp-16],%g1 C p0 - sllx %g2,16,%g2 C align p16 - st %g4,[%i0-4] - b,a L(yyy) - -L(end3): - fxtod %f10,%f2 - ld [%i0],%g5 - ldx [%fp-24],%g2 C p16 - fmuld %f2,%f8,%f16 - ldx [%fp-16],%g1 C p0 - fmuld %f2,%f6,%f4 - sllx %g2,16,%g2 C align p16 -L(xxx): fdtox %f16,%f14 - add %g2,%g1,%g1 C add p16 to p0 (ADD1) - std %f14,[%fp-24] - fdtox %f4,%f12 - std %f12,[%fp-16] - add %i0,4,%i0 C res_ptr++ - - add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2) - add %g3,%g1,%g4 C p += cy - ld [%i0],%g5 - srlx %g4,32,%g3 - ldx [%fp-40],%g2 C p16 - ldx [%fp-32],%g1 C p0 - sllx %g2,16,%g2 C align p16 - st %g4,[%i0-4] - add %g2,%g1,%g1 C add p16 to p0 (ADD1) - add %i0,4,%i0 C res_ptr++ - - add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2) - add %g3,%g1,%g4 C p += cy - ld [%i0],%g5 - srlx %g4,32,%g3 - ldx [%fp-24],%g2 C p16 - ldx [%fp-16],%g1 C p0 - sllx %g2,16,%g2 C align p16 - st %g4,[%i0-4] - add %g2,%g1,%g1 C add p16 to p0 (ADD1) - add %i0,4,%i0 C res_ptr++ - b,a L(ret) - -L(end2): - fxtod %f10,%f2 - fmuld %f2,%f8,%f16 - fmuld %f2,%f6,%f4 - fdtox %f16,%f14 - std %f14,[%fp-40] - fdtox %f4,%f12 - std %f12,[%fp-32] - ld [%i0],%g5 - ldx [%fp-24],%g2 C p16 - ldx [%fp-16],%g1 C p0 - sllx %g2,16,%g2 C align p16 -L(yyy): add %g2,%g1,%g1 C add p16 to p0 (ADD1) - add %i0,4,%i0 C res_ptr++ - - add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2) - add %g3,%g1,%g4 C p += cy - ld [%i0],%g5 - srlx %g4,32,%g3 - ldx [%fp-40],%g2 C p16 - ldx [%fp-32],%g1 C p0 - sllx %g2,16,%g2 C align p16 - st %g4,[%i0-4] - add %g2,%g1,%g1 C add p16 to p0 (ADD1) - add %i0,4,%i0 C res_ptr++ - b,a L(ret) - -L(end1): - fxtod %f10,%f2 - fmuld %f2,%f8,%f16 - fmuld %f2,%f6,%f4 - fdtox %f16,%f14 - std %f14,[%fp-24] - fdtox %f4,%f12 - std %f12,[%fp-16] - - ld [%i0],%g5 - ldx [%fp-24],%g2 C p16 - ldx [%fp-16],%g1 C p0 - sllx %g2,16,%g2 C align p16 - add %g2,%g1,%g1 C add p16 to p0 (ADD1) - add %i0,4,%i0 C res_ptr++ - -L(ret): add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2) - add %g3,%g1,%g4 C p += cy - srlx %g4,32,%g3 - st %g4,[%i0-4] - - ret - restore %g0,%g3,%o0 C sideeffect: put cy in retreg -EPILOGUE(mpn_addmul_1) diff --git a/rts/gmp/mpn/sparc32/v9/gmp-mparam.h b/rts/gmp/mpn/sparc32/v9/gmp-mparam.h deleted file mode 100644 index f946b900f0..0000000000 --- a/rts/gmp/mpn/sparc32/v9/gmp-mparam.h +++ /dev/null @@ -1,69 +0,0 @@ -/* gmp-mparam.h -- Compiler/machine parameter header file. - -Copyright (C) 1991, 1993, 1994, 1999, 2000 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of the GNU Lesser General Public License as published by -the Free Software Foundation; either version 2.1 of the License, or (at your -option) any later version. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -License for more details. - -You should have received a copy of the GNU Lesser General Public License -along with the GNU MP Library; see the file COPYING.LIB. If not, write to -the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, -MA 02111-1307, USA. */ - -#define BITS_PER_MP_LIMB 32 -#define BYTES_PER_MP_LIMB 4 -#define BITS_PER_LONGINT 32 -#define BITS_PER_INT 32 -#define BITS_PER_SHORTINT 16 -#define BITS_PER_CHAR 8 - - -/* These values are for UltraSPARC I, II, and IIi. It is bogus that - this file lives in v9, but that will do for now. */ - -/* Variations in addmul_1 speed make the multiply and square thresholds - doubtful. TOOM3_SQR_THRESHOLD had to be estimated here. */ - -/* Generated by tuneup.c, 2000-07-06. */ - -#ifndef KARATSUBA_MUL_THRESHOLD -#define KARATSUBA_MUL_THRESHOLD 30 -#endif -#ifndef TOOM3_MUL_THRESHOLD -#define TOOM3_MUL_THRESHOLD 200 -#endif - -#ifndef KARATSUBA_SQR_THRESHOLD -#define KARATSUBA_SQR_THRESHOLD 59 -#endif -#ifndef TOOM3_SQR_THRESHOLD -#define TOOM3_SQR_THRESHOLD 500 -#endif - -#ifndef BZ_THRESHOLD -#define BZ_THRESHOLD 107 -#endif - -#ifndef FIB_THRESHOLD -#define FIB_THRESHOLD 146 -#endif - -#ifndef POWM_THRESHOLD -#define POWM_THRESHOLD 29 -#endif - -#ifndef GCD_ACCEL_THRESHOLD -#define GCD_ACCEL_THRESHOLD 4 -#endif -#ifndef GCDEXT_THRESHOLD -#define GCDEXT_THRESHOLD 3 -#endif diff --git a/rts/gmp/mpn/sparc32/v9/mul_1.asm b/rts/gmp/mpn/sparc32/v9/mul_1.asm deleted file mode 100644 index f8f0fdd8c2..0000000000 --- a/rts/gmp/mpn/sparc32/v9/mul_1.asm +++ /dev/null @@ -1,267 +0,0 @@ -dnl SPARC v9 32-bit mpn_mul_1 -- Multiply a limb vector with a limb and -dnl store the result in a second limb vector. - -dnl Copyright (C) 1998, 2000 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. - -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of the GNU Lesser General Public License as published -dnl by the Free Software Foundation; either version 2.1 of the License, or (at -dnl your option) any later version. - -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -dnl License for more details. - -dnl You should have received a copy of the GNU Lesser General Public License -dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to -dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, -dnl MA 02111-1307, USA. - - -include(`../config.m4') - -C INPUT PARAMETERS -C res_ptr i0 -C s1_ptr i1 -C size i2 -C s2_limb i3 - -ASM_START() - - TEXT - ALIGN(4) -L(noll): - .word 0 - -PROLOGUE(mpn_mul_1) - save %sp,-256,%sp - -ifdef(`PIC', -`L(pc): rd %pc,%o7 - ld [%o7+L(noll)-L(pc)],%f10', -` sethi %hi(L(noll)),%g1 - ld [%g1+%lo(L(noll))],%f10') - - sethi %hi(0xffff0000),%o0 - andn %i3,%o0,%o0 - st %o0,[%fp-16] - ld [%fp-16],%f11 - fxtod %f10,%f6 - - srl %i3,16,%o0 - st %o0,[%fp-16] - ld [%fp-16],%f11 - fxtod %f10,%f8 - - mov 0,%g3 C cy = 0 - - ld [%i1],%f11 - subcc %i2,1,%i2 - be,pn %icc,L(end1) - add %i1,4,%i1 C s1_ptr++ - - fxtod %f10,%f2 - ld [%i1],%f11 - add %i1,4,%i1 C s1_ptr++ - fmuld %f2,%f8,%f16 - fmuld %f2,%f6,%f4 - fdtox %f16,%f14 - std %f14,[%fp-24] - fdtox %f4,%f12 - subcc %i2,1,%i2 - be,pn %icc,L(end2) - std %f12,[%fp-16] - - fxtod %f10,%f2 - ld [%i1],%f11 - add %i1,4,%i1 C s1_ptr++ - fmuld %f2,%f8,%f16 - fmuld %f2,%f6,%f4 - fdtox %f16,%f14 - std %f14,[%fp-40] - fdtox %f4,%f12 - subcc %i2,1,%i2 - be,pn %icc,L(end3) - std %f12,[%fp-32] - - fxtod %f10,%f2 - ld [%i1],%f11 - add %i1,4,%i1 C s1_ptr++ - ldx [%fp-24],%g2 C p16 - fmuld %f2,%f8,%f16 - ldx [%fp-16],%g1 C p0 - fmuld %f2,%f6,%f4 - sllx %g2,16,%g2 C align p16 - fdtox %f16,%f14 - add %g2,%g1,%g1 C add p16 to p0 (ADD1) - std %f14,[%fp-24] - fdtox %f4,%f12 - add %i0,4,%i0 C res_ptr++ - subcc %i2,1,%i2 - be,pn %icc,L(end4) - std %f12,[%fp-16] - - b,a L(loopm) - - .align 16 -C BEGIN LOOP -L(loop): - fxtod %f10,%f2 - ld [%i1],%f11 - add %i1,4,%i1 C s1_ptr++ - add %g3,%g1,%g4 C p += cy - srlx %g4,32,%g3 - ldx [%fp-24],%g2 C p16 - fmuld %f2,%f8,%f16 - ldx [%fp-16],%g1 C p0 - fmuld %f2,%f6,%f4 - sllx %g2,16,%g2 C align p16 - st %g4,[%i0-4] - fdtox %f16,%f14 - add %g2,%g1,%g1 C add p16 to p0 (ADD1) - std %f14,[%fp-24] - fdtox %f4,%f12 - std %f12,[%fp-16] - subcc %i2,1,%i2 - be,pn %icc,L(loope) - add %i0,4,%i0 C res_ptr++ -L(loopm): - fxtod %f10,%f2 - ld [%i1],%f11 - add %i1,4,%i1 C s1_ptr++ - add %g3,%g1,%g4 C p += cy - srlx %g4,32,%g3 - ldx [%fp-40],%g2 C p16 - fmuld %f2,%f8,%f16 - ldx [%fp-32],%g1 C p0 - fmuld %f2,%f6,%f4 - sllx %g2,16,%g2 C align p16 - st %g4,[%i0-4] - fdtox %f16,%f14 - add %g2,%g1,%g1 C add p16 to p0 (ADD1) - std %f14,[%fp-40] - fdtox %f4,%f12 - std %f12,[%fp-32] - subcc %i2,1,%i2 - bne,pt %icc,L(loop) - add %i0,4,%i0 C res_ptr++ -C END LOOP - - fxtod %f10,%f2 - add %g3,%g1,%g4 C p += cy - srlx %g4,32,%g3 - ldx [%fp-24],%g2 C p16 - fmuld %f2,%f8,%f16 - ldx [%fp-16],%g1 C p0 - fmuld %f2,%f6,%f4 - sllx %g2,16,%g2 C align p16 - st %g4,[%i0-4] - b,a L(xxx) -L(loope): -L(end4): - fxtod %f10,%f2 - add %g3,%g1,%g4 C p += cy - srlx %g4,32,%g3 - ldx [%fp-40],%g2 C p16 - fmuld %f2,%f8,%f16 - ldx [%fp-32],%g1 C p0 - fmuld %f2,%f6,%f4 - sllx %g2,16,%g2 C align p16 - st %g4,[%i0-4] - fdtox %f16,%f14 - add %g2,%g1,%g1 C add p16 to p0 (ADD1) - std %f14,[%fp-40] - fdtox %f4,%f12 - std %f12,[%fp-32] - add %i0,4,%i0 C res_ptr++ - - add %g3,%g1,%g4 C p += cy - srlx %g4,32,%g3 - ldx [%fp-24],%g2 C p16 - ldx [%fp-16],%g1 C p0 - sllx %g2,16,%g2 C align p16 - st %g4,[%i0-4] - b,a L(yyy) - -L(end3): - fxtod %f10,%f2 - ldx [%fp-24],%g2 C p16 - fmuld %f2,%f8,%f16 - ldx [%fp-16],%g1 C p0 - fmuld %f2,%f6,%f4 - sllx %g2,16,%g2 C align p16 -L(xxx): fdtox %f16,%f14 - add %g2,%g1,%g1 C add p16 to p0 (ADD1) - std %f14,[%fp-24] - fdtox %f4,%f12 - std %f12,[%fp-16] - add %i0,4,%i0 C res_ptr++ - - add %g3,%g1,%g4 C p += cy - srlx %g4,32,%g3 - ldx [%fp-40],%g2 C p16 - ldx [%fp-32],%g1 C p0 - sllx %g2,16,%g2 C align p16 - st %g4,[%i0-4] - add %g2,%g1,%g1 C add p16 to p0 (ADD1) - add %i0,4,%i0 C res_ptr++ - - add %g3,%g1,%g4 C p += cy - srlx %g4,32,%g3 - ldx [%fp-24],%g2 C p16 - ldx [%fp-16],%g1 C p0 - sllx %g2,16,%g2 C align p16 - st %g4,[%i0-4] - add %g2,%g1,%g1 C add p16 to p0 (ADD1) - add %i0,4,%i0 C res_ptr++ - b,a L(ret) - -L(end2): - fxtod %f10,%f2 - fmuld %f2,%f8,%f16 - fmuld %f2,%f6,%f4 - fdtox %f16,%f14 - std %f14,[%fp-40] - fdtox %f4,%f12 - std %f12,[%fp-32] - ldx [%fp-24],%g2 C p16 - ldx [%fp-16],%g1 C p0 - sllx %g2,16,%g2 C align p16 -L(yyy): add %g2,%g1,%g1 C add p16 to p0 (ADD1) - add %i0,4,%i0 C res_ptr++ - - add %g3,%g1,%g4 C p += cy - srlx %g4,32,%g3 - ldx [%fp-40],%g2 C p16 - ldx [%fp-32],%g1 C p0 - sllx %g2,16,%g2 C align p16 - st %g4,[%i0-4] - add %g2,%g1,%g1 C add p16 to p0 (ADD1) - add %i0,4,%i0 C res_ptr++ - b,a L(ret) - -L(end1): - fxtod %f10,%f2 - fmuld %f2,%f8,%f16 - fmuld %f2,%f6,%f4 - fdtox %f16,%f14 - std %f14,[%fp-24] - fdtox %f4,%f12 - std %f12,[%fp-16] - - ldx [%fp-24],%g2 C p16 - ldx [%fp-16],%g1 C p0 - sllx %g2,16,%g2 C align p16 - add %g2,%g1,%g1 C add p16 to p0 (ADD1) - add %i0,4,%i0 C res_ptr++ - -L(ret): add %g3,%g1,%g4 C p += cy - srlx %g4,32,%g3 - st %g4,[%i0-4] - - ret - restore %g0,%g3,%o0 C sideeffect: put cy in retreg -EPILOGUE(mpn_mul_1) diff --git a/rts/gmp/mpn/sparc32/v9/submul_1.asm b/rts/gmp/mpn/sparc32/v9/submul_1.asm deleted file mode 100644 index 6195ea88ea..0000000000 --- a/rts/gmp/mpn/sparc32/v9/submul_1.asm +++ /dev/null @@ -1,291 +0,0 @@ -dnl SPARC v9 32-bit mpn_submul_1 -- Multiply a limb vector with a limb and -dnl subtract the result from a second limb vector. - -dnl Copyright (C) 1998, 2000 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. - -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of the GNU Lesser General Public License as published -dnl by the Free Software Foundation; either version 2.1 of the License, or (at -dnl your option) any later version. - -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -dnl License for more details. - -dnl You should have received a copy of the GNU Lesser General Public License -dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to -dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, -dnl MA 02111-1307, USA. - - -include(`../config.m4') - -C INPUT PARAMETERS -C res_ptr i0 -C s1_ptr i1 -C size i2 -C s2_limb i3 - -ASM_START() - - TEXT - ALIGN(4) -L(noll): - .word 0 - -PROLOGUE(mpn_submul_1) - save %sp,-256,%sp - -ifdef(`PIC', -`L(pc): rd %pc,%o7 - ld [%o7+L(noll)-L(pc)],%f10', -` sethi %hi(L(noll)),%g1 - ld [%g1+%lo(L(noll))],%f10') - - sethi %hi(0xffff0000),%o0 - andn %i3,%o0,%o0 - st %o0,[%fp-16] - ld [%fp-16],%f11 - fxtod %f10,%f6 - - srl %i3,16,%o0 - st %o0,[%fp-16] - ld [%fp-16],%f11 - fxtod %f10,%f8 - - mov 0,%g3 C cy = 0 - - ld [%i1],%f11 - subcc %i2,1,%i2 - be,pn %icc,L(end1) - add %i1,4,%i1 C s1_ptr++ - - fxtod %f10,%f2 - ld [%i1],%f11 - add %i1,4,%i1 C s1_ptr++ - fmuld %f2,%f8,%f16 - fmuld %f2,%f6,%f4 - fdtox %f16,%f14 - std %f14,[%fp-24] - fdtox %f4,%f12 - subcc %i2,1,%i2 - be,pn %icc,L(end2) - std %f12,[%fp-16] - - fxtod %f10,%f2 - ld [%i1],%f11 - add %i1,4,%i1 C s1_ptr++ - fmuld %f2,%f8,%f16 - fmuld %f2,%f6,%f4 - fdtox %f16,%f14 - std %f14,[%fp-40] - fdtox %f4,%f12 - subcc %i2,1,%i2 - be,pn %icc,L(end3) - std %f12,[%fp-32] - - fxtod %f10,%f2 - ld [%i1],%f11 - add %i1,4,%i1 C s1_ptr++ - ld [%i0],%g5 - ldx [%fp-24],%g2 C p16 - fmuld %f2,%f8,%f16 - ldx [%fp-16],%g1 C p0 - fmuld %f2,%f6,%f4 - sllx %g2,16,%g2 C align p16 - fdtox %f16,%f14 - add %g2,%g1,%g1 C add p16 to p0 (ADD1) - std %f14,[%fp-24] - fdtox %f4,%f12 - add %i0,4,%i0 C res_ptr++ - subcc %i2,1,%i2 - be,pn %icc,L(end4) - std %f12,[%fp-16] - - b,a L(loopm) - - .align 16 -C BEGIN LOOP -L(loop): - fxtod %f10,%f2 - ld [%i1],%f11 - add %i1,4,%i1 C s1_ptr++ - add %g3,%g1,%g4 C p += cy - subcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2) - ld [%i0],%g5 - srlx %g4,32,%g3 - ldx [%fp-24],%g2 C p16 - fmuld %f2,%f8,%f16 - ldx [%fp-16],%g1 C p0 - fmuld %f2,%f6,%f4 - sllx %g2,16,%g2 C align p16 - st %l2,[%i0-4] - addx %g3,0,%g3 - fdtox %f16,%f14 - add %g2,%g1,%g1 C add p16 to p0 (ADD1) - std %f14,[%fp-24] - fdtox %f4,%f12 - std %f12,[%fp-16] - subcc %i2,1,%i2 - be,pn %icc,L(loope) - add %i0,4,%i0 C res_ptr++ -L(loopm): - fxtod %f10,%f2 - ld [%i1],%f11 - add %i1,4,%i1 C s1_ptr++ - add %g3,%g1,%g4 C p += cy - subcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2) - ld [%i0],%g5 - srlx %g4,32,%g3 - ldx [%fp-40],%g2 C p16 - fmuld %f2,%f8,%f16 - ldx [%fp-32],%g1 C p0 - fmuld %f2,%f6,%f4 - sllx %g2,16,%g2 C align p16 - st %l2,[%i0-4] - addx %g3,0,%g3 - fdtox %f16,%f14 - add %g2,%g1,%g1 C add p16 to p0 (ADD1) - std %f14,[%fp-40] - fdtox %f4,%f12 - std %f12,[%fp-32] - subcc %i2,1,%i2 - bne,pt %icc,L(loop) - add %i0,4,%i0 C res_ptr++ -C END LOOP - - fxtod %f10,%f2 - add %g3,%g1,%g4 C p += cy - subcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2) - ld [%i0],%g5 - srlx %g4,32,%g3 - ldx [%fp-24],%g2 C p16 - fmuld %f2,%f8,%f16 - ldx [%fp-16],%g1 C p0 - fmuld %f2,%f6,%f4 - sllx %g2,16,%g2 C align p16 - st %l2,[%i0-4] - b,a L(xxx) -L(loope): -L(end4): - fxtod %f10,%f2 - add %g3,%g1,%g4 C p += cy - subcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2) - ld [%i0],%g5 - srlx %g4,32,%g3 - ldx [%fp-40],%g2 C p16 - fmuld %f2,%f8,%f16 - ldx [%fp-32],%g1 C p0 - fmuld %f2,%f6,%f4 - sllx %g2,16,%g2 C align p16 - st %l2,[%i0-4] - fdtox %f16,%f14 - add %g2,%g1,%g1 C add p16 to p0 (ADD1) - std %f14,[%fp-40] - fdtox %f4,%f12 - std %f12,[%fp-32] - add %i0,4,%i0 C res_ptr++ - - add %g3,%g1,%g4 C p += cy - subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2) - ld [%i0],%g5 - srlx %g4,32,%g3 - ldx [%fp-24],%g2 C p16 - ldx [%fp-16],%g1 C p0 - sllx %g2,16,%g2 C align p16 - st %l2,[%i0-4] - b,a L(yyy) - -L(end3): - fxtod %f10,%f2 - ld [%i0],%g5 - ldx [%fp-24],%g2 C p16 - fmuld %f2,%f8,%f16 - ldx [%fp-16],%g1 C p0 - fmuld %f2,%f6,%f4 - sllx %g2,16,%g2 C align p16 -L(xxx): fdtox %f16,%f14 - add %g2,%g1,%g1 C add p16 to p0 (ADD1) - std %f14,[%fp-24] - fdtox %f4,%f12 - std %f12,[%fp-16] - add %i0,4,%i0 C res_ptr++ - - add %g3,%g1,%g4 C p += cy - subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2) - ld [%i0],%g5 - srlx %g4,32,%g3 - ldx [%fp-40],%g2 C p16 - ldx [%fp-32],%g1 C p0 - sllx %g2,16,%g2 C align p16 - st %l2,[%i0-4] - add %g2,%g1,%g1 C add p16 to p0 (ADD1) - add %i0,4,%i0 C res_ptr++ - - add %g3,%g1,%g4 C p += cy - subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2) - ld [%i0],%g5 - srlx %g4,32,%g3 - ldx [%fp-24],%g2 C p16 - ldx [%fp-16],%g1 C p0 - sllx %g2,16,%g2 C align p16 - st %l2,[%i0-4] - add %g2,%g1,%g1 C add p16 to p0 (ADD1) - add %i0,4,%i0 C res_ptr++ - b,a L(ret) - -L(end2): - fxtod %f10,%f2 - fmuld %f2,%f8,%f16 - fmuld %f2,%f6,%f4 - fdtox %f16,%f14 - std %f14,[%fp-40] - fdtox %f4,%f12 - std %f12,[%fp-32] - ld [%i0],%g5 - ldx [%fp-24],%g2 C p16 - ldx [%fp-16],%g1 C p0 - sllx %g2,16,%g2 C align p16 -L(yyy): add %g2,%g1,%g1 C add p16 to p0 (ADD1) - add %i0,4,%i0 C res_ptr++ - - add %g3,%g1,%g4 C p += cy - subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2) - ld [%i0],%g5 - srlx %g4,32,%g3 - ldx [%fp-40],%g2 C p16 - ldx [%fp-32],%g1 C p0 - sllx %g2,16,%g2 C align p16 - st %l2,[%i0-4] - add %g2,%g1,%g1 C add p16 to p0 (ADD1) - add %i0,4,%i0 C res_ptr++ - b,a L(ret) - -L(end1): - fxtod %f10,%f2 - fmuld %f2,%f8,%f16 - fmuld %f2,%f6,%f4 - fdtox %f16,%f14 - std %f14,[%fp-24] - fdtox %f4,%f12 - std %f12,[%fp-16] - - ld [%i0],%g5 - ldx [%fp-24],%g2 C p16 - ldx [%fp-16],%g1 C p0 - sllx %g2,16,%g2 C align p16 - add %g2,%g1,%g1 C add p16 to p0 (ADD1) - add %i0,4,%i0 C res_ptr++ - -L(ret): add %g3,%g1,%g4 C p += cy - subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2) - srlx %g4,32,%g3 - st %l2,[%i0-4] - - addx %g3,%g0,%g3 - ret - restore %g0,%g3,%o0 C sideeffect: put cy in retreg -EPILOGUE(mpn_submul_1) |