diff options
author | tege <tege@gmplib.org> | 2001-02-10 01:46:50 +0100 |
---|---|---|
committer | tege <tege@gmplib.org> | 2001-02-10 01:46:50 +0100 |
commit | f1f0a217a06cbdd9b541c8d84297aaa840a6ad93 (patch) | |
tree | 1833943d05f7316c17c6acf95019b43d0808fb41 /mpn/power | |
parent | ab64a1b0078eba23d2d9d44026dd05540e17db71 (diff) | |
download | gmp-f1f0a217a06cbdd9b541c8d84297aaa840a6ad93.tar.gz |
Convert files to `.asm'.
Prefix umul_ppmm and sdiv_qrnnd.
Update some comments.
Diffstat (limited to 'mpn/power')
-rw-r--r-- | mpn/power/add_n.asm | 75 | ||||
-rw-r--r-- | mpn/power/add_n.s | 79 | ||||
-rw-r--r-- | mpn/power/addmul_1.asm | 117 | ||||
-rw-r--r-- | mpn/power/addmul_1.s | 122 | ||||
-rw-r--r-- | mpn/power/lshift.asm | 52 | ||||
-rw-r--r-- | mpn/power/lshift.s | 56 | ||||
-rw-r--r-- | mpn/power/mul_1.asm | 104 | ||||
-rw-r--r-- | mpn/power/mul_1.s | 109 | ||||
-rw-r--r-- | mpn/power/rshift.asm | 50 | ||||
-rw-r--r-- | mpn/power/rshift.s | 54 | ||||
-rw-r--r-- | mpn/power/sdiv.asm | 30 | ||||
-rw-r--r-- | mpn/power/sdiv.s | 34 | ||||
-rw-r--r-- | mpn/power/sub_n.asm | 77 | ||||
-rw-r--r-- | mpn/power/sub_n.s | 80 | ||||
-rw-r--r-- | mpn/power/submul_1.asm | 122 | ||||
-rw-r--r-- | mpn/power/submul_1.s | 127 | ||||
-rw-r--r-- | mpn/power/umul.asm | 34 | ||||
-rw-r--r-- | mpn/power/umul.s | 38 |
18 files changed, 661 insertions, 699 deletions
diff --git a/mpn/power/add_n.asm b/mpn/power/add_n.asm new file mode 100644 index 000000000..ef0d53080 --- /dev/null +++ b/mpn/power/add_n.asm @@ -0,0 +1,75 @@ +dnl IBM POWER mpn_add_n -- Add two limb vectors of equal, non-zero length. + +dnl Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2001 Free Software +dnl Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 2.1 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + + +dnl INPUT PARAMETERS +dnl res_ptr r3 +dnl s1_ptr r4 +dnl s2_ptr r5 +dnl size r6 + +include(`../config.m4') + +ASM_START() +PROLOGUE(mpn_add_n) + andil. 10,6,1 C odd or even number of limbs? + l 8,0(4) C load least significant s1 limb + l 0,0(5) C load least significant s2 limb + cal 3,-4(3) C offset res_ptr, it's updated before it's used + sri 10,6,1 C count for unrolled loop + a 7,0,8 C add least significant limbs, set cy + mtctr 10 C copy count into CTR + beq 0,Leven C branch if even C of limbs (C of limbs >= 2) + +C We have an odd C of limbs. Add the first limbs separately. + cmpi 1,10,0 C is count for unrolled loop zero? + bc 4,6,L1 C bne cr1,L1 (misassembled by gas) + st 7,4(3) + aze 3,10 C use the fact that r10 is zero... + br C return + +C We added least significant limbs. Now reload the next limbs to enter loop. +L1: lu 8,4(4) C load s1 limb and update s1_ptr + lu 0,4(5) C load s2 limb and update s2_ptr + stu 7,4(3) + ae 7,0,8 C add limbs, set cy +Leven: lu 9,4(4) C load s1 limb and update s1_ptr + lu 10,4(5) C load s2 limb and update s2_ptr + bdz Lend C If done, skip loop + +Loop: lu 8,4(4) C load s1 limb and update s1_ptr + lu 0,4(5) C load s2 limb and update s2_ptr + ae 11,10,9 C add previous limbs with cy, set cy + stu 7,4(3) C + lu 9,4(4) C load s1 limb and update s1_ptr + lu 10,4(5) C load s2 limb and update s2_ptr + ae 7,0,8 C add previous limbs with cy, set cy + stu 11,4(3) C + bdn Loop C decrement CTR and loop back + +Lend: ae 11,10,9 C add limbs with cy, set cy + st 7,4(3) C + st 11,8(3) C + lil 3,0 C load cy into ... + aze 3,3 C ... return value register + br +EPILOGUE(mpn_add_n) diff --git a/mpn/power/add_n.s b/mpn/power/add_n.s deleted file mode 100644 index 68d10e4d0..000000000 --- a/mpn/power/add_n.s +++ /dev/null @@ -1,79 +0,0 @@ -# IBM POWER __gmpn_add_n -- Add two limb vectors of equal, non-zero length. - -# Copyright 1992, 1994, 1995, 1996, 1999, 2000 Free Software Foundation, -# Inc. - -# This file is part of the GNU MP Library. - -# The GNU MP Library is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 2.1 of the License, or (at your -# option) any later version. - -# The GNU MP Library is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -# License for more details. - -# You should have received a copy of the GNU Lesser General Public License -# along with the GNU MP Library; see the file COPYING.LIB. If not, write to -# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, -# MA 02111-1307, USA. - - -# INPUT PARAMETERS -# res_ptr r3 -# s1_ptr r4 -# s2_ptr r5 -# size r6 - - .toc - .globl __gmpn_add_n - .globl .__gmpn_add_n - .csect __gmpn_add_n[DS] -__gmpn_add_n: - .long .__gmpn_add_n, TOC[tc0], 0 - .csect .text[PR] - .align 2 -.__gmpn_add_n: - andil. 10,6,1 # odd or even number of limbs? - l 8,0(4) # load least significant s1 limb - l 0,0(5) # load least significant s2 limb - cal 3,-4(3) # offset res_ptr, it's updated before it's used - sri 10,6,1 # count for unrolled loop - a 7,0,8 # add least significant limbs, set cy - mtctr 10 # copy count into CTR - beq 0,Leven # branch if even # of limbs (# of limbs >= 2) - -# We have an odd # of limbs. Add the first limbs separately. - cmpi 1,10,0 # is count for unrolled loop zero? - bc 4,6,L1 # bne cr1,L1 (misassembled by gas) - st 7,4(3) - aze 3,10 # use the fact that r10 is zero... - br # return - -# We added least significant limbs. Now reload the next limbs to enter loop. -L1: lu 8,4(4) # load s1 limb and update s1_ptr - lu 0,4(5) # load s2 limb and update s2_ptr - stu 7,4(3) - ae 7,0,8 # add limbs, set cy -Leven: lu 9,4(4) # load s1 limb and update s1_ptr - lu 10,4(5) # load s2 limb and update s2_ptr - bdz Lend # If done, skip loop - -Loop: lu 8,4(4) # load s1 limb and update s1_ptr - lu 0,4(5) # load s2 limb and update s2_ptr - ae 11,9,10 # add previous limbs with cy, set cy - stu 7,4(3) # - lu 9,4(4) # load s1 limb and update s1_ptr - lu 10,4(5) # load s2 limb and update s2_ptr - ae 7,0,8 # add previous limbs with cy, set cy - stu 11,4(3) # - bdn Loop # decrement CTR and loop back - -Lend: ae 11,9,10 # add limbs with cy, set cy - st 7,4(3) # - st 11,8(3) # - lil 3,0 # load cy into ... - aze 3,3 # ... return value register - br diff --git a/mpn/power/addmul_1.asm b/mpn/power/addmul_1.asm new file mode 100644 index 000000000..1e1e358c3 --- /dev/null +++ b/mpn/power/addmul_1.asm @@ -0,0 +1,117 @@ +dnl IBM POWER mpn_addmul_1 -- Multiply a limb vector with a limb and add the +dnl result to a second limb vector. + +dnl Copyright 1992, 1994, 1999, 2000, 2001 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 2.1 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + + +dnl INPUT PARAMETERS +dnl res_ptr r3 +dnl s1_ptr r4 +dnl size r5 +dnl s2_limb r6 + +dnl The POWER architecture has no unsigned 32x32->64 bit multiplication +dnl instruction. To obtain that operation, we have to use the 32x32->64 +dnl signed multiplication instruction, and add the appropriate compensation to +dnl the high limb of the result. We add the multiplicand if the multiplier +dnl has its most significant bit set, and we add the multiplier if the +dnl multiplicand has its most significant bit set. We need to preserve the +dnl carry flag between each iteration, so we have to compute the compensation +dnl carefully (the natural, srai+and doesn't work). Since all POWER can +dnl branch in zero cycles, we use conditional branches to for the additions. + +include(`../config.m4') + +ASM_START() +PROLOGUE(mpn_addmul_1) + cal 3,-4(3) + l 0,0(4) + cmpi 0,6,0 + mtctr 5 + mul 9,0,6 + srai 7,0,31 + and 7,7,6 + mfmq 8 + cax 9,9,7 + l 7,4(3) + a 8,8,7 C add res_limb + blt Lneg +Lpos: bdz Lend + +Lploop: lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 10,0,6 + mfmq 0 + ae 8,0,9 C low limb + old_cy_limb + old cy + l 7,4(3) + aze 10,10 C propagate cy to new cy_limb + a 8,8,7 C add res_limb + bge Lp0 + cax 10,10,6 C adjust high limb for negative limb from s1 +Lp0: bdz Lend0 + lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 9,0,6 + mfmq 0 + ae 8,0,10 + l 7,4(3) + aze 9,9 + a 8,8,7 + bge Lp1 + cax 9,9,6 C adjust high limb for negative limb from s1 +Lp1: bdn Lploop + + b Lend + +Lneg: cax 9,9,0 + bdz Lend +Lnloop: lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 10,0,6 + mfmq 7 + ae 8,7,9 + l 7,4(3) + ae 10,10,0 C propagate cy to new cy_limb + a 8,8,7 C add res_limb + bge Ln0 + cax 10,10,6 C adjust high limb for negative limb from s1 +Ln0: bdz Lend0 + lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 9,0,6 + mfmq 7 + ae 8,7,10 + l 7,4(3) + ae 9,9,0 C propagate cy to new cy_limb + a 8,8,7 C add res_limb + bge Ln1 + cax 9,9,6 C adjust high limb for negative limb from s1 +Ln1: bdn Lnloop + b Lend + +Lend0: cal 9,0(10) +Lend: st 8,4(3) + aze 3,9 + br +EPILOGUE(mpn_addmul_1) diff --git a/mpn/power/addmul_1.s b/mpn/power/addmul_1.s deleted file mode 100644 index aefbedc24..000000000 --- a/mpn/power/addmul_1.s +++ /dev/null @@ -1,122 +0,0 @@ -# IBM POWER __gmpn_addmul_1 -- Multiply a limb vector with a limb and add -# the result to a second limb vector. - -# Copyright 1992, 1994, 1999, 2000 Free Software Foundation, Inc. - -# This file is part of the GNU MP Library. - -# The GNU MP Library is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 2.1 of the License, or (at your -# option) any later version. - -# The GNU MP Library is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -# License for more details. - -# You should have received a copy of the GNU Lesser General Public License -# along with the GNU MP Library; see the file COPYING.LIB. If not, write to -# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, -# MA 02111-1307, USA. - - -# INPUT PARAMETERS -# res_ptr r3 -# s1_ptr r4 -# size r5 -# s2_limb r6 - -# The POWER architecture has no unsigned 32x32->64 bit multiplication -# instruction. To obtain that operation, we have to use the 32x32->64 signed -# multiplication instruction, and add the appropriate compensation to the high -# limb of the result. We add the multiplicand if the multiplier has its most -# significant bit set, and we add the multiplier if the multiplicand has its -# most significant bit set. We need to preserve the carry flag between each -# iteration, so we have to compute the compensation carefully (the natural, -# srai+and doesn't work). Since the POWER architecture has a branch unit we -# can branch in zero cycles, so that's how we perform the additions. - - .toc - .globl __gmpn_addmul_1 - .globl .__gmpn_addmul_1 - .csect __gmpn_addmul_1[DS] -__gmpn_addmul_1: - .long .__gmpn_addmul_1, TOC[tc0], 0 - .csect .text[PR] - .align 2 -.__gmpn_addmul_1: - - cal 3,-4(3) - l 0,0(4) - cmpi 0,6,0 - mtctr 5 - mul 9,0,6 - srai 7,0,31 - and 7,7,6 - mfmq 8 - cax 9,9,7 - l 7,4(3) - a 8,8,7 # add res_limb - blt Lneg -Lpos: bdz Lend - -Lploop: lu 0,4(4) - stu 8,4(3) - cmpi 0,0,0 - mul 10,0,6 - mfmq 0 - ae 8,0,9 # low limb + old_cy_limb + old cy - l 7,4(3) - aze 10,10 # propagate cy to new cy_limb - a 8,8,7 # add res_limb - bge Lp0 - cax 10,10,6 # adjust high limb for negative limb from s1 -Lp0: bdz Lend0 - lu 0,4(4) - stu 8,4(3) - cmpi 0,0,0 - mul 9,0,6 - mfmq 0 - ae 8,0,10 - l 7,4(3) - aze 9,9 - a 8,8,7 - bge Lp1 - cax 9,9,6 # adjust high limb for negative limb from s1 -Lp1: bdn Lploop - - b Lend - -Lneg: cax 9,9,0 - bdz Lend -Lnloop: lu 0,4(4) - stu 8,4(3) - cmpi 0,0,0 - mul 10,0,6 - mfmq 7 - ae 8,7,9 - l 7,4(3) - ae 10,10,0 # propagate cy to new cy_limb - a 8,8,7 # add res_limb - bge Ln0 - cax 10,10,6 # adjust high limb for negative limb from s1 -Ln0: bdz Lend0 - lu 0,4(4) - stu 8,4(3) - cmpi 0,0,0 - mul 9,0,6 - mfmq 7 - ae 8,7,10 - l 7,4(3) - ae 9,9,0 # propagate cy to new cy_limb - a 8,8,7 # add res_limb - bge Ln1 - cax 9,9,6 # adjust high limb for negative limb from s1 -Ln1: bdn Lnloop - b Lend - -Lend0: cal 9,0(10) -Lend: st 8,4(3) - aze 3,9 - br diff --git a/mpn/power/lshift.asm b/mpn/power/lshift.asm new file mode 100644 index 000000000..c5358ff0f --- /dev/null +++ b/mpn/power/lshift.asm @@ -0,0 +1,52 @@ +dnl IBM POWER mpn_lshift -- Shift a number left. + +dnl Copyright 1992, 1994, 1999, 2000, 2001 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 2.1 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + + +dnl INPUT PARAMETERS +dnl res_ptr r3 +dnl s_ptr r4 +dnl size r5 +dnl cnt r6 + +include(`../config.m4') + +ASM_START() +PROLOGUE(mpn_lshift) + sli 0,5,2 + cax 9,3,0 + cax 4,4,0 + sfi 8,6,32 + mtctr 5 C put limb count in CTR loop register + lu 0,-4(4) C read most significant limb + sre 3,0,8 C compute carry out limb, and init MQ register + bdz Lend2 C if just one limb, skip loop + lu 0,-4(4) C read 2:nd most significant limb + sreq 7,0,8 C compute most significant limb of result + bdz Lend C if just two limb, skip loop +Loop: lu 0,-4(4) C load next lower limb + stu 7,-4(9) C store previous result during read latency + sreq 7,0,8 C compute result limb + bdn Loop C loop back until CTR is zero +Lend: stu 7,-4(9) C store 2:nd least significant limb +Lend2: sle 7,0,6 C compute least significant limb + st 7,-4(9) C store it + br +EPILOGUE(mpn_lshift) diff --git a/mpn/power/lshift.s b/mpn/power/lshift.s deleted file mode 100644 index fd2576476..000000000 --- a/mpn/power/lshift.s +++ /dev/null @@ -1,56 +0,0 @@ -# IBM POWER __gmpn_lshift -- - -# Copyright 1992, 1994, 1999, 2000 Free Software Foundation, Inc. - -# This file is part of the GNU MP Library. - -# The GNU MP Library is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 2.1 of the License, or (at your -# option) any later version. - -# The GNU MP Library is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -# License for more details. - -# You should have received a copy of the GNU Lesser General Public License -# along with the GNU MP Library; see the file COPYING.LIB. If not, write to -# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, -# MA 02111-1307, USA. - - -# INPUT PARAMETERS -# res_ptr r3 -# s_ptr r4 -# size r5 -# cnt r6 - - .toc - .globl __gmpn_lshift - .globl .__gmpn_lshift - .csect __gmpn_lshift[DS] -__gmpn_lshift: - .long .__gmpn_lshift, TOC[tc0], 0 - .csect .text[PR] - .align 2 -.__gmpn_lshift: - sli 0,5,2 - cax 9,3,0 - cax 4,4,0 - sfi 8,6,32 - mtctr 5 # put limb count in CTR loop register - lu 0,-4(4) # read most significant limb - sre 3,0,8 # compute carry out limb, and init MQ register - bdz Lend2 # if just one limb, skip loop - lu 0,-4(4) # read 2:nd most significant limb - sreq 7,0,8 # compute most significant limb of result - bdz Lend # if just two limb, skip loop -Loop: lu 0,-4(4) # load next lower limb - stu 7,-4(9) # store previous result during read latency - sreq 7,0,8 # compute result limb - bdn Loop # loop back until CTR is zero -Lend: stu 7,-4(9) # store 2:nd least significant limb -Lend2: sle 7,0,6 # compute least significant limb - st 7,-4(9) # store it" \ - br diff --git a/mpn/power/mul_1.asm b/mpn/power/mul_1.asm new file mode 100644 index 000000000..bdf009906 --- /dev/null +++ b/mpn/power/mul_1.asm @@ -0,0 +1,104 @@ +dnl IBM POWER mpn_mul_1 -- Multiply a limb vector with a limb and store the +dnl result in a second limb vector. + +dnl Copyright 1992, 1994, 1999, 2000, 2001 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 2.1 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + + +dnl INPUT PARAMETERS +dnl res_ptr r3 +dnl s1_ptr r4 +dnl size r5 +dnl s2_limb r6 + +dnl The POWER architecture has no unsigned 32x32->64 bit multiplication +dnl instruction. To obtain that operation, we have to use the 32x32->64 +dnl signed multiplication instruction, and add the appropriate compensation to +dnl the high limb of the result. We add the multiplicand if the multiplier +dnl has its most significant bit set, and we add the multiplier if the +dnl multiplicand has its most significant bit set. We need to preserve the +dnl carry flag between each iteration, so we have to compute the compensation +dnl carefully (the natural, srai+and doesn't work). Since all POWER can +dnl branch in zero cycles, we use conditional branches to for the additions. + +include(`../config.m4') + +ASM_START() +PROLOGUE(mpn_mul_1) + cal 3,-4(3) + l 0,0(4) + cmpi 0,6,0 + mtctr 5 + mul 9,0,6 + srai 7,0,31 + and 7,7,6 + mfmq 8 + ai 0,0,0 C reset carry + cax 9,9,7 + blt Lneg +Lpos: bdz Lend +Lploop: lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 10,0,6 + mfmq 0 + ae 8,0,9 + bge Lp0 + cax 10,10,6 C adjust high limb for negative limb from s1 +Lp0: bdz Lend0 + lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 9,0,6 + mfmq 0 + ae 8,0,10 + bge Lp1 + cax 9,9,6 C adjust high limb for negative limb from s1 +Lp1: bdn Lploop + b Lend + +Lneg: cax 9,9,0 + bdz Lend +Lnloop: lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 10,0,6 + cax 10,10,0 C adjust high limb for negative s2_limb + mfmq 0 + ae 8,0,9 + bge Ln0 + cax 10,10,6 C adjust high limb for negative limb from s1 +Ln0: bdz Lend0 + lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 9,0,6 + cax 9,9,0 C adjust high limb for negative s2_limb + mfmq 0 + ae 8,0,10 + bge Ln1 + cax 9,9,6 C adjust high limb for negative limb from s1 +Ln1: bdn Lnloop + b Lend + +Lend0: cal 9,0(10) +Lend: st 8,4(3) + aze 3,9 + br +EPILOGUE(mpn_mul_1) diff --git a/mpn/power/mul_1.s b/mpn/power/mul_1.s deleted file mode 100644 index 61869437e..000000000 --- a/mpn/power/mul_1.s +++ /dev/null @@ -1,109 +0,0 @@ -# IBM POWER __gmpn_mul_1 -- Multiply a limb vector with a limb and store -# the result in a second limb vector. - -# Copyright 1992, 1994, 1999, 2000 Free Software Foundation, Inc. - -# This file is part of the GNU MP Library. - -# The GNU MP Library is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 2.1 of the License, or (at your -# option) any later version. - -# The GNU MP Library is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -# License for more details. - -# You should have received a copy of the GNU Lesser General Public License -# along with the GNU MP Library; see the file COPYING.LIB. If not, write to -# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, -# MA 02111-1307, USA. - - -# INPUT PARAMETERS -# res_ptr r3 -# s1_ptr r4 -# size r5 -# s2_limb r6 - -# The POWER architecture has no unsigned 32x32->64 bit multiplication -# instruction. To obtain that operation, we have to use the 32x32->64 signed -# multiplication instruction, and add the appropriate compensation to the high -# limb of the result. We add the multiplicand if the multiplier has its most -# significant bit set, and we add the multiplier if the multiplicand has its -# most significant bit set. We need to preserve the carry flag between each -# iteration, so we have to compute the compensation carefully (the natural, -# srai+and doesn't work). Since the POWER architecture has a branch unit we -# can branch in zero cycles, so that's how we perform the additions. - - .toc - .globl __gmpn_mul_1 - .globl .__gmpn_mul_1 - .csect __gmpn_mul_1[DS] -__gmpn_mul_1: - .long .__gmpn_mul_1, TOC[tc0], 0 - .csect .text[PR] - .align 2 -.__gmpn_mul_1: - - cal 3,-4(3) - l 0,0(4) - cmpi 0,6,0 - mtctr 5 - mul 9,0,6 - srai 7,0,31 - and 7,7,6 - mfmq 8 - ai 0,0,0 # reset carry - cax 9,9,7 - blt Lneg -Lpos: bdz Lend -Lploop: lu 0,4(4) - stu 8,4(3) - cmpi 0,0,0 - mul 10,0,6 - mfmq 0 - ae 8,0,9 - bge Lp0 - cax 10,10,6 # adjust high limb for negative limb from s1 -Lp0: bdz Lend0 - lu 0,4(4) - stu 8,4(3) - cmpi 0,0,0 - mul 9,0,6 - mfmq 0 - ae 8,0,10 - bge Lp1 - cax 9,9,6 # adjust high limb for negative limb from s1 -Lp1: bdn Lploop - b Lend - -Lneg: cax 9,9,0 - bdz Lend -Lnloop: lu 0,4(4) - stu 8,4(3) - cmpi 0,0,0 - mul 10,0,6 - cax 10,10,0 # adjust high limb for negative s2_limb - mfmq 0 - ae 8,0,9 - bge Ln0 - cax 10,10,6 # adjust high limb for negative limb from s1 -Ln0: bdz Lend0 - lu 0,4(4) - stu 8,4(3) - cmpi 0,0,0 - mul 9,0,6 - cax 9,9,0 # adjust high limb for negative s2_limb - mfmq 0 - ae 8,0,10 - bge Ln1 - cax 9,9,6 # adjust high limb for negative limb from s1 -Ln1: bdn Lnloop - b Lend - -Lend0: cal 9,0(10) -Lend: st 8,4(3) - aze 3,9 - br diff --git a/mpn/power/rshift.asm b/mpn/power/rshift.asm new file mode 100644 index 000000000..2b8c07d0e --- /dev/null +++ b/mpn/power/rshift.asm @@ -0,0 +1,50 @@ +dnl IBM POWER mpn_rshift -- Shift a number right. + +dnl Copyright 1992, 1994, 1999, 2000, 2001 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 2.1 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + + +dnl INPUT PARAMETERS +dnl res_ptr r3 +dnl s_ptr r4 +dnl size r5 +dnl cnt r6 + +include(`../config.m4') + +ASM_START() +PROLOGUE(mpn_rshift) + sfi 8,6,32 + mtctr 5 C put limb count in CTR loop register + l 0,0(4) C read least significant limb + ai 9,3,-4 C adjust res_ptr since it's offset in the stu:s + sle 3,0,8 C compute carry limb, and init MQ register + bdz Lend2 C if just one limb, skip loop + lu 0,4(4) C read 2:nd least significant limb + sleq 7,0,8 C compute least significant limb of result + bdz Lend C if just two limb, skip loop +Loop: lu 0,4(4) C load next higher limb + stu 7,4(9) C store previous result during read latency + sleq 7,0,8 C compute result limb + bdn Loop C loop back until CTR is zero +Lend: stu 7,4(9) C store 2:nd most significant limb +Lend2: sre 7,0,6 C compute most significant limb + st 7,4(9) C store it + br +EPILOGUE(mpn_rshift) diff --git a/mpn/power/rshift.s b/mpn/power/rshift.s deleted file mode 100644 index e95cf7dca..000000000 --- a/mpn/power/rshift.s +++ /dev/null @@ -1,54 +0,0 @@ -# IBM POWER __gmpn_rshift -- - -# Copyright 1992, 1994, 1999, 2000 Free Software Foundation, Inc. - -# This file is part of the GNU MP Library. - -# The GNU MP Library is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 2.1 of the License, or (at your -# option) any later version. - -# The GNU MP Library is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -# License for more details. - -# You should have received a copy of the GNU Lesser General Public License -# along with the GNU MP Library; see the file COPYING.LIB. If not, write to -# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, -# MA 02111-1307, USA. - - -# INPUT PARAMETERS -# res_ptr r3 -# s_ptr r4 -# size r5 -# cnt r6 - - .toc - .globl __gmpn_rshift - .globl .__gmpn_rshift - .csect __gmpn_rshift[DS] -__gmpn_rshift: - .long .__gmpn_rshift, TOC[tc0], 0 - .csect .text[PR] - .align 2 -.__gmpn_rshift: - sfi 8,6,32 - mtctr 5 # put limb count in CTR loop register - l 0,0(4) # read least significant limb - ai 9,3,-4 # adjust res_ptr since it's offset in the stu:s - sle 3,0,8 # compute carry limb, and init MQ register - bdz Lend2 # if just one limb, skip loop - lu 0,4(4) # read 2:nd least significant limb - sleq 7,0,8 # compute least significant limb of result - bdz Lend # if just two limb, skip loop -Loop: lu 0,4(4) # load next higher limb - stu 7,4(9) # store previous result during read latency - sleq 7,0,8 # compute result limb - bdn Loop # loop back until CTR is zero -Lend: stu 7,4(9) # store 2:nd most significant limb -Lend2: sre 7,0,6 # compute most significant limb - st 7,4(9) # store it" \ - br diff --git a/mpn/power/sdiv.asm b/mpn/power/sdiv.asm new file mode 100644 index 000000000..75bcbb790 --- /dev/null +++ b/mpn/power/sdiv.asm @@ -0,0 +1,30 @@ +dnl Copyright 1999, 2001 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 2.1 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + +include(`../config.m4') + +ASM_START() +PROLOGUE(mpn_sdiv_qrnnd) + mtmq 5 + div 0,4,6 + mfmq 9 + st 9,0(3) + mr 3,0 + br +EPILOGUE(mpn_sdiv_qrnnd) diff --git a/mpn/power/sdiv.s b/mpn/power/sdiv.s deleted file mode 100644 index a6ca4246e..000000000 --- a/mpn/power/sdiv.s +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright 1999 Free Software Foundation, Inc. - -# This file is part of the GNU MP Library. - -# The GNU MP Library is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 2.1 of the License, or (at your -# option) any later version. - -# The GNU MP Library is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -# License for more details. - -# You should have received a copy of the GNU Lesser General Public License -# along with the GNU MP Library; see the file COPYING.LIB. If not, write to -# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, -# MA 02111-1307, USA. - - .toc - .globl __sdiv_qrnnd - .globl .__sdiv_qrnnd - .csect __sdiv_qrnnd[DS] -__sdiv_qrnnd: - .long .__sdiv_qrnnd, TOC[tc0], 0 - .csect .text[PR] - .align 2 -.__sdiv_qrnnd: - mtmq 5 - div 0,4,6 - mfmq 9 - st 9,0(3) - mr 3,0 - br diff --git a/mpn/power/sub_n.asm b/mpn/power/sub_n.asm new file mode 100644 index 000000000..e4e9892db --- /dev/null +++ b/mpn/power/sub_n.asm @@ -0,0 +1,77 @@ +dnl IBM POWER mpn_sub_n -- Subtract two limb vectors of equal, non-zero +dnl length. + +dnl Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2001 Free Software +dnl Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 2.1 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + + +dnl INPUT PARAMETERS +dnl res_ptr r3 +dnl s1_ptr r4 +dnl s2_ptr r5 +dnl size r6 + +include(`../config.m4') + +ASM_START() +PROLOGUE(mpn_sub_n) + andil. 10,6,1 C odd or even number of limbs? + l 8,0(4) C load least significant s1 limb + l 0,0(5) C load least significant s2 limb + cal 3,-4(3) C offset res_ptr, it's updated before it's used + sri 10,6,1 C count for unrolled loop + sf 7,0,8 C subtract least significant limbs, set cy + mtctr 10 C copy count into CTR + beq 0,Leven C branch if even C of limbs (C of limbs >= 2) + +C We have an odd C of limbs. Add the first limbs separately. + cmpi 1,10,0 C is count for unrolled loop zero? + bc 4,6,L1 C bne cr1,L1 (misassembled by gas) + st 7,4(3) + sfe 3,0,0 C load !cy into ... + sfi 3,3,0 C ... return value register + br C return + +C We added least significant limbs. Now reload the next limbs to enter loop. +L1: lu 8,4(4) C load s1 limb and update s1_ptr + lu 0,4(5) C load s2 limb and update s2_ptr + stu 7,4(3) + sfe 7,0,8 C subtract limbs, set cy +Leven: lu 9,4(4) C load s1 limb and update s1_ptr + lu 10,4(5) C load s2 limb and update s2_ptr + bdz Lend C If done, skip loop + +Loop: lu 8,4(4) C load s1 limb and update s1_ptr + lu 0,4(5) C load s2 limb and update s2_ptr + sfe 11,10,9 C subtract previous limbs with cy, set cy + stu 7,4(3) C + lu 9,4(4) C load s1 limb and update s1_ptr + lu 10,4(5) C load s2 limb and update s2_ptr + sfe 7,0,8 C subtract previous limbs with cy, set cy + stu 11,4(3) C + bdn Loop C decrement CTR and loop back + +Lend: sfe 11,10,9 C subtract limbs with cy, set cy + st 7,4(3) C + st 11,8(3) C + sfe 3,0,0 C load !cy into ... + sfi 3,3,0 C ... return value register + br +EPILOGUE(mpn_sub_n) diff --git a/mpn/power/sub_n.s b/mpn/power/sub_n.s deleted file mode 100644 index a8ecd204f..000000000 --- a/mpn/power/sub_n.s +++ /dev/null @@ -1,80 +0,0 @@ -# IBM POWER __gmpn_sub_n -- Subtract two limb vectors of equal, non-zero length. - -# Copyright 1992, 1994, 1995, 1996, 1999, 2000 Free Software Foundation, -# Inc. - -# This file is part of the GNU MP Library. - -# The GNU MP Library is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 2.1 of the License, or (at your -# option) any later version. - -# The GNU MP Library is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -# License for more details. - -# You should have received a copy of the GNU Lesser General Public License -# along with the GNU MP Library; see the file COPYING.LIB. If not, write to -# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, -# MA 02111-1307, USA. - - -# INPUT PARAMETERS -# res_ptr r3 -# s1_ptr r4 -# s2_ptr r5 -# size r6 - - .toc - .globl __gmpn_sub_n - .globl .__gmpn_sub_n - .csect __gmpn_sub_n[DS] -__gmpn_sub_n: - .long .__gmpn_sub_n, TOC[tc0], 0 - .csect .text[PR] - .align 2 -.__gmpn_sub_n: - andil. 10,6,1 # odd or even number of limbs? - l 8,0(4) # load least significant s1 limb - l 0,0(5) # load least significant s2 limb - cal 3,-4(3) # offset res_ptr, it's updated before it's used - sri 10,6,1 # count for unrolled loop - sf 7,0,8 # subtract least significant limbs, set cy - mtctr 10 # copy count into CTR - beq 0,Leven # branch if even # of limbs (# of limbs >= 2) - -# We have an odd # of limbs. Add the first limbs separately. - cmpi 1,10,0 # is count for unrolled loop zero? - bc 4,6,L1 # bne cr1,L1 (misassembled by gas) - st 7,4(3) - sfe 3,0,0 # load !cy into ... - sfi 3,3,0 # ... return value register - br # return - -# We added least significant limbs. Now reload the next limbs to enter loop. -L1: lu 8,4(4) # load s1 limb and update s1_ptr - lu 0,4(5) # load s2 limb and update s2_ptr - stu 7,4(3) - sfe 7,0,8 # subtract limbs, set cy -Leven: lu 9,4(4) # load s1 limb and update s1_ptr - lu 10,4(5) # load s2 limb and update s2_ptr - bdz Lend # If done, skip loop - -Loop: lu 8,4(4) # load s1 limb and update s1_ptr - lu 0,4(5) # load s2 limb and update s2_ptr - sfe 11,10,9 # subtract previous limbs with cy, set cy - stu 7,4(3) # - lu 9,4(4) # load s1 limb and update s1_ptr - lu 10,4(5) # load s2 limb and update s2_ptr - sfe 7,0,8 # subtract previous limbs with cy, set cy - stu 11,4(3) # - bdn Loop # decrement CTR and loop back - -Lend: sfe 11,10,9 # subtract limbs with cy, set cy - st 7,4(3) # - st 11,8(3) # - sfe 3,0,0 # load !cy into ... - sfi 3,3,0 # ... return value register - br diff --git a/mpn/power/submul_1.asm b/mpn/power/submul_1.asm new file mode 100644 index 000000000..70f685ac2 --- /dev/null +++ b/mpn/power/submul_1.asm @@ -0,0 +1,122 @@ +dnl IBM POWER mpn_submul_1 -- Multiply a limb vector with a limb and subtract +dnl the result from a second limb vector. + +dnl Copyright 1992, 1994, 1999, 2000, 2001 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 2.1 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + + +dnl INPUT PARAMETERS +dnl res_ptr r3 +dnl s1_ptr r4 +dnl size r5 +dnl s2_limb r6 + +dnl The POWER architecture has no unsigned 32x32->64 bit multiplication +dnl instruction. To obtain that operation, we have to use the 32x32->64 +dnl signed multiplication instruction, and add the appropriate compensation to +dnl the high limb of the result. We add the multiplicand if the multiplier +dnl has its most significant bit set, and we add the multiplier if the +dnl multiplicand has its most significant bit set. We need to preserve the +dnl carry flag between each iteration, so we have to compute the compensation +dnl carefully (the natural, srai+and doesn't work). Since all POWER can +dnl branch in zero cycles, we use conditional branches to for the additions. + +include(`../config.m4') + +ASM_START() +PROLOGUE(mpn_submul_1) + cal 3,-4(3) + l 0,0(4) + cmpi 0,6,0 + mtctr 5 + mul 9,0,6 + srai 7,0,31 + and 7,7,6 + mfmq 11 + cax 9,9,7 + l 7,4(3) + sf 8,11,7 C add res_limb + a 11,8,11 C invert cy (r11 is junk) + blt Lneg +Lpos: bdz Lend + +Lploop: lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 10,0,6 + mfmq 0 + ae 11,0,9 C low limb + old_cy_limb + old cy + l 7,4(3) + aze 10,10 C propagate cy to new cy_limb + sf 8,11,7 C add res_limb + a 11,8,11 C invert cy (r11 is junk) + bge Lp0 + cax 10,10,6 C adjust high limb for negative limb from s1 +Lp0: bdz Lend0 + lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 9,0,6 + mfmq 0 + ae 11,0,10 + l 7,4(3) + aze 9,9 + sf 8,11,7 + a 11,8,11 C invert cy (r11 is junk) + bge Lp1 + cax 9,9,6 C adjust high limb for negative limb from s1 +Lp1: bdn Lploop + + b Lend + +Lneg: cax 9,9,0 + bdz Lend +Lnloop: lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 10,0,6 + mfmq 7 + ae 11,7,9 + l 7,4(3) + ae 10,10,0 C propagate cy to new cy_limb + sf 8,11,7 C add res_limb + a 11,8,11 C invert cy (r11 is junk) + bge Ln0 + cax 10,10,6 C adjust high limb for negative limb from s1 +Ln0: bdz Lend0 + lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 9,0,6 + mfmq 7 + ae 11,7,10 + l 7,4(3) + ae 9,9,0 C propagate cy to new cy_limb + sf 8,11,7 C add res_limb + a 11,8,11 C invert cy (r11 is junk) + bge Ln1 + cax 9,9,6 C adjust high limb for negative limb from s1 +Ln1: bdn Lnloop + b Lend + +Lend0: cal 9,0(10) +Lend: st 8,4(3) + aze 3,9 + br +EPILOGUE(mpn_submul_1) diff --git a/mpn/power/submul_1.s b/mpn/power/submul_1.s deleted file mode 100644 index 972bf876c..000000000 --- a/mpn/power/submul_1.s +++ /dev/null @@ -1,127 +0,0 @@ -# IBM POWER __gmpn_submul_1 -- Multiply a limb vector with a limb and subtract -# the result from a second limb vector. - -# Copyright 1992, 1994, 1999, 2000 Free Software Foundation, Inc. - -# This file is part of the GNU MP Library. - -# The GNU MP Library is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 2.1 of the License, or (at your -# option) any later version. - -# The GNU MP Library is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -# License for more details. - -# You should have received a copy of the GNU Lesser General Public License -# along with the GNU MP Library; see the file COPYING.LIB. If not, write to -# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, -# MA 02111-1307, USA. - - -# INPUT PARAMETERS -# res_ptr r3 -# s1_ptr r4 -# size r5 -# s2_limb r6 - -# The POWER architecture has no unsigned 32x32->64 bit multiplication -# instruction. To obtain that operation, we have to use the 32x32->64 signed -# multiplication instruction, and add the appropriate compensation to the high -# limb of the result. We add the multiplicand if the multiplier has its most -# significant bit set, and we add the multiplier if the multiplicand has its -# most significant bit set. We need to preserve the carry flag between each -# iteration, so we have to compute the compensation carefully (the natural, -# srai+and doesn't work). Since the POWER architecture has a branch unit we -# can branch in zero cycles, so that's how we perform the additions. - - .toc - .globl __gmpn_submul_1 - .globl .__gmpn_submul_1 - .csect __gmpn_submul_1[DS] -__gmpn_submul_1: - .long .__gmpn_submul_1, TOC[tc0], 0 - .csect .text[PR] - .align 2 -.__gmpn_submul_1: - - cal 3,-4(3) - l 0,0(4) - cmpi 0,6,0 - mtctr 5 - mul 9,0,6 - srai 7,0,31 - and 7,7,6 - mfmq 11 - cax 9,9,7 - l 7,4(3) - sf 8,11,7 # add res_limb - a 11,8,11 # invert cy (r11 is junk) - blt Lneg -Lpos: bdz Lend - -Lploop: lu 0,4(4) - stu 8,4(3) - cmpi 0,0,0 - mul 10,0,6 - mfmq 0 - ae 11,0,9 # low limb + old_cy_limb + old cy - l 7,4(3) - aze 10,10 # propagate cy to new cy_limb - sf 8,11,7 # add res_limb - a 11,8,11 # invert cy (r11 is junk) - bge Lp0 - cax 10,10,6 # adjust high limb for negative limb from s1 -Lp0: bdz Lend0 - lu 0,4(4) - stu 8,4(3) - cmpi 0,0,0 - mul 9,0,6 - mfmq 0 - ae 11,0,10 - l 7,4(3) - aze 9,9 - sf 8,11,7 - a 11,8,11 # invert cy (r11 is junk) - bge Lp1 - cax 9,9,6 # adjust high limb for negative limb from s1 -Lp1: bdn Lploop - - b Lend - -Lneg: cax 9,9,0 - bdz Lend -Lnloop: lu 0,4(4) - stu 8,4(3) - cmpi 0,0,0 - mul 10,0,6 - mfmq 7 - ae 11,7,9 - l 7,4(3) - ae 10,10,0 # propagate cy to new cy_limb - sf 8,11,7 # add res_limb - a 11,8,11 # invert cy (r11 is junk) - bge Ln0 - cax 10,10,6 # adjust high limb for negative limb from s1 -Ln0: bdz Lend0 - lu 0,4(4) - stu 8,4(3) - cmpi 0,0,0 - mul 9,0,6 - mfmq 7 - ae 11,7,10 - l 7,4(3) - ae 9,9,0 # propagate cy to new cy_limb - sf 8,11,7 # add res_limb - a 11,8,11 # invert cy (r11 is junk) - bge Ln1 - cax 9,9,6 # adjust high limb for negative limb from s1 -Ln1: bdn Lnloop - b Lend - -Lend0: cal 9,0(10) -Lend: st 8,4(3) - aze 3,9 - br diff --git a/mpn/power/umul.asm b/mpn/power/umul.asm new file mode 100644 index 000000000..82eb6ee8e --- /dev/null +++ b/mpn/power/umul.asm @@ -0,0 +1,34 @@ +dnl Copyright 1999, 2001 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 2.1 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + +include(`../config.m4') + +ASM_START() +PROLOGUE(mpn_umul_ppmm) + mul 9,4,5 + srai 0,4,31 + and 0,0,5 + srai 5,5,31 + and 5,5,4 + cax 0,0,5 + mfmq 11 + st 11,0(3) + cax 3,9,0 + br +EPILOGUE(mpn_umul_ppmm) diff --git a/mpn/power/umul.s b/mpn/power/umul.s deleted file mode 100644 index f2f85503c..000000000 --- a/mpn/power/umul.s +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright 1999 Free Software Foundation, Inc. - -# This file is part of the GNU MP Library. - -# The GNU MP Library is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 2.1 of the License, or (at your -# option) any later version. - -# The GNU MP Library is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -# License for more details. - -# You should have received a copy of the GNU Lesser General Public License -# along with the GNU MP Library; see the file COPYING.LIB. If not, write to -# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, -# MA 02111-1307, USA. - - .toc - .globl __umul_ppmm - .globl .__umul_ppmm - .csect __umul_ppmm[DS] -__umul_ppmm: - .long .__umul_ppmm, TOC[tc0], 0 - .csect .text[PR] - .align 2 -.__umul_ppmm: - mul 9,4,5 - srai 0,4,31 - and 0,0,5 - srai 5,5,31 - and 5,5,4 - cax 0,0,5 - mfmq 11 - st 11,0(3) - cax 3,9,0 - br |