diff options
Diffstat (limited to 'rts/gmp/mpn/alpha/ev5')
-rw-r--r-- | rts/gmp/mpn/alpha/ev5/add_n.asm | 143 | ||||
-rw-r--r-- | rts/gmp/mpn/alpha/ev5/lshift.asm | 169 | ||||
-rw-r--r-- | rts/gmp/mpn/alpha/ev5/rshift.asm | 167 | ||||
-rw-r--r-- | rts/gmp/mpn/alpha/ev5/sub_n.asm | 143 |
4 files changed, 622 insertions, 0 deletions
diff --git a/rts/gmp/mpn/alpha/ev5/add_n.asm b/rts/gmp/mpn/alpha/ev5/add_n.asm new file mode 100644 index 0000000000..716d6404ae --- /dev/null +++ b/rts/gmp/mpn/alpha/ev5/add_n.asm @@ -0,0 +1,143 @@ +dnl Alpha EV5 __gmpn_add_n -- Add two limb vectors of the same length > 0 and +dnl store sum in a third limb vector. + +dnl Copyright (C) 1995, 1999, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published by +dnl the Free Software Foundation; either version 2.1 of the License, or (at your +dnl option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + +include(`../config.m4') + +dnl INPUT PARAMETERS +dnl res_ptr r16 +dnl s1_ptr r17 +dnl s2_ptr r18 +dnl size r19 + +ASM_START() +PROLOGUE(mpn_add_n) + bis r31,r31,r25 C clear cy + subq r19,4,r19 C decr loop cnt + blt r19,$Lend2 C if less than 4 limbs, goto 2nd loop +C Start software pipeline for 1st loop + ldq r0,0(r18) + ldq r4,0(r17) + ldq r1,8(r18) + ldq r5,8(r17) + addq r17,32,r17 C update s1_ptr + ldq r2,16(r18) + addq r0,r4,r20 C 1st main add + ldq r3,24(r18) + subq r19,4,r19 C decr loop cnt + ldq r6,-16(r17) + cmpult r20,r0,r25 C compute cy from last add + ldq r7,-8(r17) + addq r1,r5,r28 C 2nd main add + addq r18,32,r18 C update s2_ptr + addq r28,r25,r21 C 2nd carry add + cmpult r28,r5,r8 C compute cy from last add + blt r19,$Lend1 C if less than 4 limbs remain, jump +C 1st loop handles groups of 4 limbs in a software pipeline + ALIGN(16) +$Loop: cmpult r21,r28,r25 C compute cy from last add + ldq r0,0(r18) + bis r8,r25,r25 C combine cy from the two adds + ldq r1,8(r18) + addq r2,r6,r28 C 3rd main add + ldq r4,0(r17) + addq r28,r25,r22 C 3rd carry add + ldq r5,8(r17) + cmpult r28,r6,r8 C compute cy from last add + cmpult r22,r28,r25 C compute cy from last add + stq r20,0(r16) + bis r8,r25,r25 C combine cy from the two adds + stq r21,8(r16) + addq r3,r7,r28 C 4th main add + addq r28,r25,r23 C 4th carry add + cmpult r28,r7,r8 C compute cy from last add + cmpult r23,r28,r25 C compute cy from last add + addq r17,32,r17 C update s1_ptr + bis r8,r25,r25 C combine cy from the two adds + addq r16,32,r16 C update res_ptr + addq r0,r4,r28 C 1st main add + ldq r2,16(r18) + addq r25,r28,r20 C 1st carry add + ldq r3,24(r18) + cmpult r28,r4,r8 C compute cy from last add + ldq r6,-16(r17) + cmpult r20,r28,r25 C compute cy from last add + ldq r7,-8(r17) + bis r8,r25,r25 C combine cy from the two adds + subq r19,4,r19 C decr loop cnt + stq r22,-16(r16) + addq r1,r5,r28 C 2nd main add + stq r23,-8(r16) + addq r25,r28,r21 C 2nd carry add + addq r18,32,r18 C update s2_ptr + cmpult r28,r5,r8 C compute cy from last add + bge r19,$Loop +C Finish software pipeline for 1st loop +$Lend1: cmpult r21,r28,r25 C compute cy from last add + bis r8,r25,r25 C combine cy from the two adds + addq r2,r6,r28 C 3rd main add + addq r28,r25,r22 C 3rd carry add + cmpult r28,r6,r8 C compute cy from last add + cmpult r22,r28,r25 C compute cy from last add + stq r20,0(r16) + bis r8,r25,r25 C combine cy from the two adds + stq r21,8(r16) + addq r3,r7,r28 C 4th main add + addq r28,r25,r23 C 4th carry add + cmpult r28,r7,r8 C compute cy from last add + cmpult r23,r28,r25 C compute cy from last add + bis r8,r25,r25 C combine cy from the two adds + addq r16,32,r16 C update res_ptr + stq r22,-16(r16) + stq r23,-8(r16) +$Lend2: addq r19,4,r19 C restore loop cnt + beq r19,$Lret +C Start software pipeline for 2nd loop + ldq r0,0(r18) + ldq r4,0(r17) + subq r19,1,r19 + beq r19,$Lend0 +C 2nd loop handles remaining 1-3 limbs + ALIGN(16) +$Loop0: addq r0,r4,r28 C main add + ldq r0,8(r18) + cmpult r28,r4,r8 C compute cy from last add + ldq r4,8(r17) + addq r28,r25,r20 C carry add + addq r18,8,r18 + addq r17,8,r17 + stq r20,0(r16) + cmpult r20,r28,r25 C compute cy from last add + subq r19,1,r19 C decr loop cnt + bis r8,r25,r25 C combine cy from the two adds + addq r16,8,r16 + bne r19,$Loop0 +$Lend0: addq r0,r4,r28 C main add + addq r28,r25,r20 C carry add + cmpult r28,r4,r8 C compute cy from last add + cmpult r20,r28,r25 C compute cy from last add + stq r20,0(r16) + bis r8,r25,r25 C combine cy from the two adds + +$Lret: bis r25,r31,r0 C return cy + ret r31,(r26),1 +EPILOGUE(mpn_add_n) +ASM_END() diff --git a/rts/gmp/mpn/alpha/ev5/lshift.asm b/rts/gmp/mpn/alpha/ev5/lshift.asm new file mode 100644 index 0000000000..cb181dda66 --- /dev/null +++ b/rts/gmp/mpn/alpha/ev5/lshift.asm @@ -0,0 +1,169 @@ +dnl Alpha EV5 __gmpn_lshift -- Shift a number left. + +dnl Copyright (C) 1994, 1995, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published by +dnl the Free Software Foundation; either version 2.1 of the License, or (at your +dnl option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + +include(`../config.m4') + +dnl INPUT PARAMETERS +dnl res_ptr r16 +dnl s1_ptr r17 +dnl size r18 +dnl cnt r19 + +dnl This code runs at 3.25 cycles/limb on the EV5. + +ASM_START() +PROLOGUE(mpn_lshift) + s8addq r18,r17,r17 C make r17 point at end of s1 + ldq r4,-8(r17) C load first limb + subq r31,r19,r20 + s8addq r18,r16,r16 C make r16 point at end of RES + subq r18,1,r18 + and r18,4-1,r28 C number of limbs in first loop + srl r4,r20,r0 C compute function result + + beq r28,$L0 + subq r18,r28,r18 + + ALIGN(8) +$Loop0: ldq r3,-16(r17) + subq r16,8,r16 + sll r4,r19,r5 + subq r17,8,r17 + subq r28,1,r28 + srl r3,r20,r6 + bis r3,r3,r4 + bis r5,r6,r8 + stq r8,0(r16) + bne r28,$Loop0 + +$L0: sll r4,r19,r24 + beq r18,$Lend +C warm up phase 1 + ldq r1,-16(r17) + subq r18,4,r18 + ldq r2,-24(r17) + ldq r3,-32(r17) + ldq r4,-40(r17) + beq r18,$Lend1 +C warm up phase 2 + srl r1,r20,r7 + sll r1,r19,r21 + srl r2,r20,r8 + ldq r1,-48(r17) + sll r2,r19,r22 + ldq r2,-56(r17) + srl r3,r20,r5 + bis r7,r24,r7 + sll r3,r19,r23 + bis r8,r21,r8 + srl r4,r20,r6 + ldq r3,-64(r17) + sll r4,r19,r24 + ldq r4,-72(r17) + subq r18,4,r18 + beq r18,$Lend2 + ALIGN(16) +C main loop +$Loop: stq r7,-8(r16) + bis r5,r22,r5 + stq r8,-16(r16) + bis r6,r23,r6 + + srl r1,r20,r7 + subq r18,4,r18 + sll r1,r19,r21 + unop C ldq r31,-96(r17) + + srl r2,r20,r8 + ldq r1,-80(r17) + sll r2,r19,r22 + ldq r2,-88(r17) + + stq r5,-24(r16) + bis r7,r24,r7 + stq r6,-32(r16) + bis r8,r21,r8 + + srl r3,r20,r5 + unop C ldq r31,-96(r17) + sll r3,r19,r23 + subq r16,32,r16 + + srl r4,r20,r6 + ldq r3,-96(r17) + sll r4,r19,r24 + ldq r4,-104(r17) + + subq r17,32,r17 + bne r18,$Loop +C cool down phase 2/1 +$Lend2: stq r7,-8(r16) + bis r5,r22,r5 + stq r8,-16(r16) + bis r6,r23,r6 + srl r1,r20,r7 + sll r1,r19,r21 + srl r2,r20,r8 + sll r2,r19,r22 + stq r5,-24(r16) + bis r7,r24,r7 + stq r6,-32(r16) + bis r8,r21,r8 + srl r3,r20,r5 + sll r3,r19,r23 + srl r4,r20,r6 + sll r4,r19,r24 +C cool down phase 2/2 + stq r7,-40(r16) + bis r5,r22,r5 + stq r8,-48(r16) + bis r6,r23,r6 + stq r5,-56(r16) + stq r6,-64(r16) +C cool down phase 2/3 + stq r24,-72(r16) + ret r31,(r26),1 + +C cool down phase 1/1 +$Lend1: srl r1,r20,r7 + sll r1,r19,r21 + srl r2,r20,r8 + sll r2,r19,r22 + srl r3,r20,r5 + bis r7,r24,r7 + sll r3,r19,r23 + bis r8,r21,r8 + srl r4,r20,r6 + sll r4,r19,r24 +C cool down phase 1/2 + stq r7,-8(r16) + bis r5,r22,r5 + stq r8,-16(r16) + bis r6,r23,r6 + stq r5,-24(r16) + stq r6,-32(r16) + stq r24,-40(r16) + ret r31,(r26),1 + +$Lend: stq r24,-8(r16) + ret r31,(r26),1 +EPILOGUE(mpn_lshift) +ASM_END() diff --git a/rts/gmp/mpn/alpha/ev5/rshift.asm b/rts/gmp/mpn/alpha/ev5/rshift.asm new file mode 100644 index 0000000000..9940d83fad --- /dev/null +++ b/rts/gmp/mpn/alpha/ev5/rshift.asm @@ -0,0 +1,167 @@ +dnl Alpha EV5 __gmpn_rshift -- Shift a number right. + +dnl Copyright (C) 1994, 1995, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published by +dnl the Free Software Foundation; either version 2.1 of the License, or (at your +dnl option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + +include(`../config.m4') + +dnl INPUT PARAMETERS +dnl res_ptr r16 +dnl s1_ptr r17 +dnl size r18 +dnl cnt r19 + +dnl This code runs at 3.25 cycles/limb on the EV5. + +ASM_START() +PROLOGUE(mpn_rshift) + ldq r4,0(r17) C load first limb + subq r31,r19,r20 + subq r18,1,r18 + and r18,4-1,r28 C number of limbs in first loop + sll r4,r20,r0 C compute function result + + beq r28,$L0 + subq r18,r28,r18 + + ALIGN(8) +$Loop0: ldq r3,8(r17) + addq r16,8,r16 + srl r4,r19,r5 + addq r17,8,r17 + subq r28,1,r28 + sll r3,r20,r6 + bis r3,r3,r4 + bis r5,r6,r8 + stq r8,-8(r16) + bne r28,$Loop0 + +$L0: srl r4,r19,r24 + beq r18,$Lend +C warm up phase 1 + ldq r1,8(r17) + subq r18,4,r18 + ldq r2,16(r17) + ldq r3,24(r17) + ldq r4,32(r17) + beq r18,$Lend1 +C warm up phase 2 + sll r1,r20,r7 + srl r1,r19,r21 + sll r2,r20,r8 + ldq r1,40(r17) + srl r2,r19,r22 + ldq r2,48(r17) + sll r3,r20,r5 + bis r7,r24,r7 + srl r3,r19,r23 + bis r8,r21,r8 + sll r4,r20,r6 + ldq r3,56(r17) + srl r4,r19,r24 + ldq r4,64(r17) + subq r18,4,r18 + beq r18,$Lend2 + ALIGN(16) +C main loop +$Loop: stq r7,0(r16) + bis r5,r22,r5 + stq r8,8(r16) + bis r6,r23,r6 + + sll r1,r20,r7 + subq r18,4,r18 + srl r1,r19,r21 + unop C ldq r31,-96(r17) + + sll r2,r20,r8 + ldq r1,72(r17) + srl r2,r19,r22 + ldq r2,80(r17) + + stq r5,16(r16) + bis r7,r24,r7 + stq r6,24(r16) + bis r8,r21,r8 + + sll r3,r20,r5 + unop C ldq r31,-96(r17) + srl r3,r19,r23 + addq r16,32,r16 + + sll r4,r20,r6 + ldq r3,88(r17) + srl r4,r19,r24 + ldq r4,96(r17) + + addq r17,32,r17 + bne r18,$Loop +C cool down phase 2/1 +$Lend2: stq r7,0(r16) + bis r5,r22,r5 + stq r8,8(r16) + bis r6,r23,r6 + sll r1,r20,r7 + srl r1,r19,r21 + sll r2,r20,r8 + srl r2,r19,r22 + stq r5,16(r16) + bis r7,r24,r7 + stq r6,24(r16) + bis r8,r21,r8 + sll r3,r20,r5 + srl r3,r19,r23 + sll r4,r20,r6 + srl r4,r19,r24 +C cool down phase 2/2 + stq r7,32(r16) + bis r5,r22,r5 + stq r8,40(r16) + bis r6,r23,r6 + stq r5,48(r16) + stq r6,56(r16) +C cool down phase 2/3 + stq r24,64(r16) + ret r31,(r26),1 + +C cool down phase 1/1 +$Lend1: sll r1,r20,r7 + srl r1,r19,r21 + sll r2,r20,r8 + srl r2,r19,r22 + sll r3,r20,r5 + bis r7,r24,r7 + srl r3,r19,r23 + bis r8,r21,r8 + sll r4,r20,r6 + srl r4,r19,r24 +C cool down phase 1/2 + stq r7,0(r16) + bis r5,r22,r5 + stq r8,8(r16) + bis r6,r23,r6 + stq r5,16(r16) + stq r6,24(r16) + stq r24,32(r16) + ret r31,(r26),1 + +$Lend: stq r24,0(r16) + ret r31,(r26),1 +EPILOGUE(mpn_rshift) +ASM_END() diff --git a/rts/gmp/mpn/alpha/ev5/sub_n.asm b/rts/gmp/mpn/alpha/ev5/sub_n.asm new file mode 100644 index 0000000000..5248a2aa38 --- /dev/null +++ b/rts/gmp/mpn/alpha/ev5/sub_n.asm @@ -0,0 +1,143 @@ +dnl Alpha EV5 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 +dnl and store difference in a third limb vector. + +dnl Copyright (C) 1995, 1999, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published by +dnl the Free Software Foundation; either version 2.1 of the License, or (at your +dnl option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + +include(`../config.m4') + +dnl INPUT PARAMETERS +dnl res_ptr r16 +dnl s1_ptr r17 +dnl s2_ptr r18 +dnl size r19 + +ASM_START() +PROLOGUE(mpn_sub_n) + bis r31,r31,r25 C clear cy + subq r19,4,r19 C decr loop cnt + blt r19,$Lend2 C if less than 4 limbs, goto 2nd loop +C Start software pipeline for 1st loop + ldq r0,0(r18) + ldq r4,0(r17) + ldq r1,8(r18) + ldq r5,8(r17) + addq r17,32,r17 C update s1_ptr + ldq r2,16(r18) + subq r4,r0,r20 C 1st main subtract + ldq r3,24(r18) + subq r19,4,r19 C decr loop cnt + ldq r6,-16(r17) + cmpult r4,r0,r25 C compute cy from last subtract + ldq r7,-8(r17) + subq r5,r1,r28 C 2nd main subtract + addq r18,32,r18 C update s2_ptr + subq r28,r25,r21 C 2nd carry subtract + cmpult r5,r1,r8 C compute cy from last subtract + blt r19,$Lend1 C if less than 4 limbs remain, jump +C 1st loop handles groups of 4 limbs in a software pipeline + ALIGN(16) +$Loop: cmpult r28,r25,r25 C compute cy from last subtract + ldq r0,0(r18) + bis r8,r25,r25 C combine cy from the two subtracts + ldq r1,8(r18) + subq r6,r2,r28 C 3rd main subtract + ldq r4,0(r17) + subq r28,r25,r22 C 3rd carry subtract + ldq r5,8(r17) + cmpult r6,r2,r8 C compute cy from last subtract + cmpult r28,r25,r25 C compute cy from last subtract + stq r20,0(r16) + bis r8,r25,r25 C combine cy from the two subtracts + stq r21,8(r16) + subq r7,r3,r28 C 4th main subtract + subq r28,r25,r23 C 4th carry subtract + cmpult r7,r3,r8 C compute cy from last subtract + cmpult r28,r25,r25 C compute cy from last subtract + addq r17,32,r17 C update s1_ptr + bis r8,r25,r25 C combine cy from the two subtracts + addq r16,32,r16 C update res_ptr + subq r4,r0,r28 C 1st main subtract + ldq r2,16(r18) + subq r28,r25,r20 C 1st carry subtract + ldq r3,24(r18) + cmpult r4,r0,r8 C compute cy from last subtract + ldq r6,-16(r17) + cmpult r28,r25,r25 C compute cy from last subtract + ldq r7,-8(r17) + bis r8,r25,r25 C combine cy from the two subtracts + subq r19,4,r19 C decr loop cnt + stq r22,-16(r16) + subq r5,r1,r28 C 2nd main subtract + stq r23,-8(r16) + subq r28,r25,r21 C 2nd carry subtract + addq r18,32,r18 C update s2_ptr + cmpult r5,r1,r8 C compute cy from last subtract + bge r19,$Loop +C Finish software pipeline for 1st loop +$Lend1: cmpult r28,r25,r25 C compute cy from last subtract + bis r8,r25,r25 C combine cy from the two subtracts + subq r6,r2,r28 C cy add + subq r28,r25,r22 C 3rd main subtract + cmpult r6,r2,r8 C compute cy from last subtract + cmpult r28,r25,r25 C compute cy from last subtract + stq r20,0(r16) + bis r8,r25,r25 C combine cy from the two subtracts + stq r21,8(r16) + subq r7,r3,r28 C cy add + subq r28,r25,r23 C 4th main subtract + cmpult r7,r3,r8 C compute cy from last subtract + cmpult r28,r25,r25 C compute cy from last subtract + bis r8,r25,r25 C combine cy from the two subtracts + addq r16,32,r16 C update res_ptr + stq r22,-16(r16) + stq r23,-8(r16) +$Lend2: addq r19,4,r19 C restore loop cnt + beq r19,$Lret +C Start software pipeline for 2nd loop + ldq r0,0(r18) + ldq r4,0(r17) + subq r19,1,r19 + beq r19,$Lend0 +C 2nd loop handles remaining 1-3 limbs + ALIGN(16) +$Loop0: subq r4,r0,r28 C main subtract + cmpult r4,r0,r8 C compute cy from last subtract + ldq r0,8(r18) + ldq r4,8(r17) + subq r28,r25,r20 C carry subtract + addq r18,8,r18 + addq r17,8,r17 + stq r20,0(r16) + cmpult r28,r25,r25 C compute cy from last subtract + subq r19,1,r19 C decr loop cnt + bis r8,r25,r25 C combine cy from the two subtracts + addq r16,8,r16 + bne r19,$Loop0 +$Lend0: subq r4,r0,r28 C main subtract + subq r28,r25,r20 C carry subtract + cmpult r4,r0,r8 C compute cy from last subtract + cmpult r28,r25,r25 C compute cy from last subtract + stq r20,0(r16) + bis r8,r25,r25 C combine cy from the two subtracts + +$Lret: bis r25,r31,r0 C return cy + ret r31,(r26),1 +EPILOGUE(mpn_sub_n) +ASM_END() |