summaryrefslogtreecommitdiff
path: root/mpn/power
diff options
context:
space:
mode:
authortege <tege@gmplib.org>2001-02-10 01:46:50 +0100
committertege <tege@gmplib.org>2001-02-10 01:46:50 +0100
commitf1f0a217a06cbdd9b541c8d84297aaa840a6ad93 (patch)
tree1833943d05f7316c17c6acf95019b43d0808fb41 /mpn/power
parentab64a1b0078eba23d2d9d44026dd05540e17db71 (diff)
downloadgmp-f1f0a217a06cbdd9b541c8d84297aaa840a6ad93.tar.gz
Convert files to `.asm'.
Prefix umul_ppmm and sdiv_qrnnd. Update some comments.
Diffstat (limited to 'mpn/power')
-rw-r--r--mpn/power/add_n.asm75
-rw-r--r--mpn/power/add_n.s79
-rw-r--r--mpn/power/addmul_1.asm117
-rw-r--r--mpn/power/addmul_1.s122
-rw-r--r--mpn/power/lshift.asm52
-rw-r--r--mpn/power/lshift.s56
-rw-r--r--mpn/power/mul_1.asm104
-rw-r--r--mpn/power/mul_1.s109
-rw-r--r--mpn/power/rshift.asm50
-rw-r--r--mpn/power/rshift.s54
-rw-r--r--mpn/power/sdiv.asm30
-rw-r--r--mpn/power/sdiv.s34
-rw-r--r--mpn/power/sub_n.asm77
-rw-r--r--mpn/power/sub_n.s80
-rw-r--r--mpn/power/submul_1.asm122
-rw-r--r--mpn/power/submul_1.s127
-rw-r--r--mpn/power/umul.asm34
-rw-r--r--mpn/power/umul.s38
18 files changed, 661 insertions, 699 deletions
diff --git a/mpn/power/add_n.asm b/mpn/power/add_n.asm
new file mode 100644
index 000000000..ef0d53080
--- /dev/null
+++ b/mpn/power/add_n.asm
@@ -0,0 +1,75 @@
+dnl IBM POWER mpn_add_n -- Add two limb vectors of equal, non-zero length.
+
+dnl Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2001 Free Software
+dnl Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+dnl INPUT PARAMETERS
+dnl res_ptr r3
+dnl s1_ptr r4
+dnl s2_ptr r5
+dnl size r6
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_add_n)
+ andil. 10,6,1 C odd or even number of limbs?
+ l 8,0(4) C load least significant s1 limb
+ l 0,0(5) C load least significant s2 limb
+ cal 3,-4(3) C offset res_ptr, it's updated before it's used
+ sri 10,6,1 C count for unrolled loop
+ a 7,0,8 C add least significant limbs, set cy
+ mtctr 10 C copy count into CTR
+ beq 0,Leven C branch if even C of limbs (C of limbs >= 2)
+
+C We have an odd C of limbs. Add the first limbs separately.
+ cmpi 1,10,0 C is count for unrolled loop zero?
+ bc 4,6,L1 C bne cr1,L1 (misassembled by gas)
+ st 7,4(3)
+ aze 3,10 C use the fact that r10 is zero...
+ br C return
+
+C We added least significant limbs. Now reload the next limbs to enter loop.
+L1: lu 8,4(4) C load s1 limb and update s1_ptr
+ lu 0,4(5) C load s2 limb and update s2_ptr
+ stu 7,4(3)
+ ae 7,0,8 C add limbs, set cy
+Leven: lu 9,4(4) C load s1 limb and update s1_ptr
+ lu 10,4(5) C load s2 limb and update s2_ptr
+ bdz Lend C If done, skip loop
+
+Loop: lu 8,4(4) C load s1 limb and update s1_ptr
+ lu 0,4(5) C load s2 limb and update s2_ptr
+ ae 11,10,9 C add previous limbs with cy, set cy
+ stu 7,4(3) C
+ lu 9,4(4) C load s1 limb and update s1_ptr
+ lu 10,4(5) C load s2 limb and update s2_ptr
+ ae 7,0,8 C add previous limbs with cy, set cy
+ stu 11,4(3) C
+ bdn Loop C decrement CTR and loop back
+
+Lend: ae 11,10,9 C add limbs with cy, set cy
+ st 7,4(3) C
+ st 11,8(3) C
+ lil 3,0 C load cy into ...
+ aze 3,3 C ... return value register
+ br
+EPILOGUE(mpn_add_n)
diff --git a/mpn/power/add_n.s b/mpn/power/add_n.s
deleted file mode 100644
index 68d10e4d0..000000000
--- a/mpn/power/add_n.s
+++ /dev/null
@@ -1,79 +0,0 @@
-# IBM POWER __gmpn_add_n -- Add two limb vectors of equal, non-zero length.
-
-# Copyright 1992, 1994, 1995, 1996, 1999, 2000 Free Software Foundation,
-# Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
-# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
-# MA 02111-1307, USA.
-
-
-# INPUT PARAMETERS
-# res_ptr r3
-# s1_ptr r4
-# s2_ptr r5
-# size r6
-
- .toc
- .globl __gmpn_add_n
- .globl .__gmpn_add_n
- .csect __gmpn_add_n[DS]
-__gmpn_add_n:
- .long .__gmpn_add_n, TOC[tc0], 0
- .csect .text[PR]
- .align 2
-.__gmpn_add_n:
- andil. 10,6,1 # odd or even number of limbs?
- l 8,0(4) # load least significant s1 limb
- l 0,0(5) # load least significant s2 limb
- cal 3,-4(3) # offset res_ptr, it's updated before it's used
- sri 10,6,1 # count for unrolled loop
- a 7,0,8 # add least significant limbs, set cy
- mtctr 10 # copy count into CTR
- beq 0,Leven # branch if even # of limbs (# of limbs >= 2)
-
-# We have an odd # of limbs. Add the first limbs separately.
- cmpi 1,10,0 # is count for unrolled loop zero?
- bc 4,6,L1 # bne cr1,L1 (misassembled by gas)
- st 7,4(3)
- aze 3,10 # use the fact that r10 is zero...
- br # return
-
-# We added least significant limbs. Now reload the next limbs to enter loop.
-L1: lu 8,4(4) # load s1 limb and update s1_ptr
- lu 0,4(5) # load s2 limb and update s2_ptr
- stu 7,4(3)
- ae 7,0,8 # add limbs, set cy
-Leven: lu 9,4(4) # load s1 limb and update s1_ptr
- lu 10,4(5) # load s2 limb and update s2_ptr
- bdz Lend # If done, skip loop
-
-Loop: lu 8,4(4) # load s1 limb and update s1_ptr
- lu 0,4(5) # load s2 limb and update s2_ptr
- ae 11,9,10 # add previous limbs with cy, set cy
- stu 7,4(3) #
- lu 9,4(4) # load s1 limb and update s1_ptr
- lu 10,4(5) # load s2 limb and update s2_ptr
- ae 7,0,8 # add previous limbs with cy, set cy
- stu 11,4(3) #
- bdn Loop # decrement CTR and loop back
-
-Lend: ae 11,9,10 # add limbs with cy, set cy
- st 7,4(3) #
- st 11,8(3) #
- lil 3,0 # load cy into ...
- aze 3,3 # ... return value register
- br
diff --git a/mpn/power/addmul_1.asm b/mpn/power/addmul_1.asm
new file mode 100644
index 000000000..1e1e358c3
--- /dev/null
+++ b/mpn/power/addmul_1.asm
@@ -0,0 +1,117 @@
+dnl IBM POWER mpn_addmul_1 -- Multiply a limb vector with a limb and add the
+dnl result to a second limb vector.
+
+dnl Copyright 1992, 1994, 1999, 2000, 2001 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+dnl INPUT PARAMETERS
+dnl res_ptr r3
+dnl s1_ptr r4
+dnl size r5
+dnl s2_limb r6
+
+dnl The POWER architecture has no unsigned 32x32->64 bit multiplication
+dnl instruction. To obtain that operation, we have to use the 32x32->64
+dnl signed multiplication instruction, and add the appropriate compensation to
+dnl the high limb of the result. We add the multiplicand if the multiplier
+dnl has its most significant bit set, and we add the multiplier if the
+dnl multiplicand has its most significant bit set. We need to preserve the
+dnl carry flag between each iteration, so we have to compute the compensation
+dnl carefully (the natural, srai+and doesn't work). Since all POWER can
+dnl branch in zero cycles, we use conditional branches to for the additions.
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+ cal 3,-4(3)
+ l 0,0(4)
+ cmpi 0,6,0
+ mtctr 5
+ mul 9,0,6
+ srai 7,0,31
+ and 7,7,6
+ mfmq 8
+ cax 9,9,7
+ l 7,4(3)
+ a 8,8,7 C add res_limb
+ blt Lneg
+Lpos: bdz Lend
+
+Lploop: lu 0,4(4)
+ stu 8,4(3)
+ cmpi 0,0,0
+ mul 10,0,6
+ mfmq 0
+ ae 8,0,9 C low limb + old_cy_limb + old cy
+ l 7,4(3)
+ aze 10,10 C propagate cy to new cy_limb
+ a 8,8,7 C add res_limb
+ bge Lp0
+ cax 10,10,6 C adjust high limb for negative limb from s1
+Lp0: bdz Lend0
+ lu 0,4(4)
+ stu 8,4(3)
+ cmpi 0,0,0
+ mul 9,0,6
+ mfmq 0
+ ae 8,0,10
+ l 7,4(3)
+ aze 9,9
+ a 8,8,7
+ bge Lp1
+ cax 9,9,6 C adjust high limb for negative limb from s1
+Lp1: bdn Lploop
+
+ b Lend
+
+Lneg: cax 9,9,0
+ bdz Lend
+Lnloop: lu 0,4(4)
+ stu 8,4(3)
+ cmpi 0,0,0
+ mul 10,0,6
+ mfmq 7
+ ae 8,7,9
+ l 7,4(3)
+ ae 10,10,0 C propagate cy to new cy_limb
+ a 8,8,7 C add res_limb
+ bge Ln0
+ cax 10,10,6 C adjust high limb for negative limb from s1
+Ln0: bdz Lend0
+ lu 0,4(4)
+ stu 8,4(3)
+ cmpi 0,0,0
+ mul 9,0,6
+ mfmq 7
+ ae 8,7,10
+ l 7,4(3)
+ ae 9,9,0 C propagate cy to new cy_limb
+ a 8,8,7 C add res_limb
+ bge Ln1
+ cax 9,9,6 C adjust high limb for negative limb from s1
+Ln1: bdn Lnloop
+ b Lend
+
+Lend0: cal 9,0(10)
+Lend: st 8,4(3)
+ aze 3,9
+ br
+EPILOGUE(mpn_addmul_1)
diff --git a/mpn/power/addmul_1.s b/mpn/power/addmul_1.s
deleted file mode 100644
index aefbedc24..000000000
--- a/mpn/power/addmul_1.s
+++ /dev/null
@@ -1,122 +0,0 @@
-# IBM POWER __gmpn_addmul_1 -- Multiply a limb vector with a limb and add
-# the result to a second limb vector.
-
-# Copyright 1992, 1994, 1999, 2000 Free Software Foundation, Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
-# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
-# MA 02111-1307, USA.
-
-
-# INPUT PARAMETERS
-# res_ptr r3
-# s1_ptr r4
-# size r5
-# s2_limb r6
-
-# The POWER architecture has no unsigned 32x32->64 bit multiplication
-# instruction. To obtain that operation, we have to use the 32x32->64 signed
-# multiplication instruction, and add the appropriate compensation to the high
-# limb of the result. We add the multiplicand if the multiplier has its most
-# significant bit set, and we add the multiplier if the multiplicand has its
-# most significant bit set. We need to preserve the carry flag between each
-# iteration, so we have to compute the compensation carefully (the natural,
-# srai+and doesn't work). Since the POWER architecture has a branch unit we
-# can branch in zero cycles, so that's how we perform the additions.
-
- .toc
- .globl __gmpn_addmul_1
- .globl .__gmpn_addmul_1
- .csect __gmpn_addmul_1[DS]
-__gmpn_addmul_1:
- .long .__gmpn_addmul_1, TOC[tc0], 0
- .csect .text[PR]
- .align 2
-.__gmpn_addmul_1:
-
- cal 3,-4(3)
- l 0,0(4)
- cmpi 0,6,0
- mtctr 5
- mul 9,0,6
- srai 7,0,31
- and 7,7,6
- mfmq 8
- cax 9,9,7
- l 7,4(3)
- a 8,8,7 # add res_limb
- blt Lneg
-Lpos: bdz Lend
-
-Lploop: lu 0,4(4)
- stu 8,4(3)
- cmpi 0,0,0
- mul 10,0,6
- mfmq 0
- ae 8,0,9 # low limb + old_cy_limb + old cy
- l 7,4(3)
- aze 10,10 # propagate cy to new cy_limb
- a 8,8,7 # add res_limb
- bge Lp0
- cax 10,10,6 # adjust high limb for negative limb from s1
-Lp0: bdz Lend0
- lu 0,4(4)
- stu 8,4(3)
- cmpi 0,0,0
- mul 9,0,6
- mfmq 0
- ae 8,0,10
- l 7,4(3)
- aze 9,9
- a 8,8,7
- bge Lp1
- cax 9,9,6 # adjust high limb for negative limb from s1
-Lp1: bdn Lploop
-
- b Lend
-
-Lneg: cax 9,9,0
- bdz Lend
-Lnloop: lu 0,4(4)
- stu 8,4(3)
- cmpi 0,0,0
- mul 10,0,6
- mfmq 7
- ae 8,7,9
- l 7,4(3)
- ae 10,10,0 # propagate cy to new cy_limb
- a 8,8,7 # add res_limb
- bge Ln0
- cax 10,10,6 # adjust high limb for negative limb from s1
-Ln0: bdz Lend0
- lu 0,4(4)
- stu 8,4(3)
- cmpi 0,0,0
- mul 9,0,6
- mfmq 7
- ae 8,7,10
- l 7,4(3)
- ae 9,9,0 # propagate cy to new cy_limb
- a 8,8,7 # add res_limb
- bge Ln1
- cax 9,9,6 # adjust high limb for negative limb from s1
-Ln1: bdn Lnloop
- b Lend
-
-Lend0: cal 9,0(10)
-Lend: st 8,4(3)
- aze 3,9
- br
diff --git a/mpn/power/lshift.asm b/mpn/power/lshift.asm
new file mode 100644
index 000000000..c5358ff0f
--- /dev/null
+++ b/mpn/power/lshift.asm
@@ -0,0 +1,52 @@
+dnl IBM POWER mpn_lshift -- Shift a number left.
+
+dnl Copyright 1992, 1994, 1999, 2000, 2001 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+dnl INPUT PARAMETERS
+dnl res_ptr r3
+dnl s_ptr r4
+dnl size r5
+dnl cnt r6
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+ sli 0,5,2
+ cax 9,3,0
+ cax 4,4,0
+ sfi 8,6,32
+ mtctr 5 C put limb count in CTR loop register
+ lu 0,-4(4) C read most significant limb
+ sre 3,0,8 C compute carry out limb, and init MQ register
+ bdz Lend2 C if just one limb, skip loop
+ lu 0,-4(4) C read 2:nd most significant limb
+ sreq 7,0,8 C compute most significant limb of result
+ bdz Lend C if just two limb, skip loop
+Loop: lu 0,-4(4) C load next lower limb
+ stu 7,-4(9) C store previous result during read latency
+ sreq 7,0,8 C compute result limb
+ bdn Loop C loop back until CTR is zero
+Lend: stu 7,-4(9) C store 2:nd least significant limb
+Lend2: sle 7,0,6 C compute least significant limb
+ st 7,-4(9) C store it
+ br
+EPILOGUE(mpn_lshift)
diff --git a/mpn/power/lshift.s b/mpn/power/lshift.s
deleted file mode 100644
index fd2576476..000000000
--- a/mpn/power/lshift.s
+++ /dev/null
@@ -1,56 +0,0 @@
-# IBM POWER __gmpn_lshift --
-
-# Copyright 1992, 1994, 1999, 2000 Free Software Foundation, Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
-# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
-# MA 02111-1307, USA.
-
-
-# INPUT PARAMETERS
-# res_ptr r3
-# s_ptr r4
-# size r5
-# cnt r6
-
- .toc
- .globl __gmpn_lshift
- .globl .__gmpn_lshift
- .csect __gmpn_lshift[DS]
-__gmpn_lshift:
- .long .__gmpn_lshift, TOC[tc0], 0
- .csect .text[PR]
- .align 2
-.__gmpn_lshift:
- sli 0,5,2
- cax 9,3,0
- cax 4,4,0
- sfi 8,6,32
- mtctr 5 # put limb count in CTR loop register
- lu 0,-4(4) # read most significant limb
- sre 3,0,8 # compute carry out limb, and init MQ register
- bdz Lend2 # if just one limb, skip loop
- lu 0,-4(4) # read 2:nd most significant limb
- sreq 7,0,8 # compute most significant limb of result
- bdz Lend # if just two limb, skip loop
-Loop: lu 0,-4(4) # load next lower limb
- stu 7,-4(9) # store previous result during read latency
- sreq 7,0,8 # compute result limb
- bdn Loop # loop back until CTR is zero
-Lend: stu 7,-4(9) # store 2:nd least significant limb
-Lend2: sle 7,0,6 # compute least significant limb
- st 7,-4(9) # store it" \
- br
diff --git a/mpn/power/mul_1.asm b/mpn/power/mul_1.asm
new file mode 100644
index 000000000..bdf009906
--- /dev/null
+++ b/mpn/power/mul_1.asm
@@ -0,0 +1,104 @@
+dnl IBM POWER mpn_mul_1 -- Multiply a limb vector with a limb and store the
+dnl result in a second limb vector.
+
+dnl Copyright 1992, 1994, 1999, 2000, 2001 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+dnl INPUT PARAMETERS
+dnl res_ptr r3
+dnl s1_ptr r4
+dnl size r5
+dnl s2_limb r6
+
+dnl The POWER architecture has no unsigned 32x32->64 bit multiplication
+dnl instruction. To obtain that operation, we have to use the 32x32->64
+dnl signed multiplication instruction, and add the appropriate compensation to
+dnl the high limb of the result. We add the multiplicand if the multiplier
+dnl has its most significant bit set, and we add the multiplier if the
+dnl multiplicand has its most significant bit set. We need to preserve the
+dnl carry flag between each iteration, so we have to compute the compensation
+dnl carefully (the natural, srai+and doesn't work). Since all POWER can
+dnl branch in zero cycles, we use conditional branches to for the additions.
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+ cal 3,-4(3)
+ l 0,0(4)
+ cmpi 0,6,0
+ mtctr 5
+ mul 9,0,6
+ srai 7,0,31
+ and 7,7,6
+ mfmq 8
+ ai 0,0,0 C reset carry
+ cax 9,9,7
+ blt Lneg
+Lpos: bdz Lend
+Lploop: lu 0,4(4)
+ stu 8,4(3)
+ cmpi 0,0,0
+ mul 10,0,6
+ mfmq 0
+ ae 8,0,9
+ bge Lp0
+ cax 10,10,6 C adjust high limb for negative limb from s1
+Lp0: bdz Lend0
+ lu 0,4(4)
+ stu 8,4(3)
+ cmpi 0,0,0
+ mul 9,0,6
+ mfmq 0
+ ae 8,0,10
+ bge Lp1
+ cax 9,9,6 C adjust high limb for negative limb from s1
+Lp1: bdn Lploop
+ b Lend
+
+Lneg: cax 9,9,0
+ bdz Lend
+Lnloop: lu 0,4(4)
+ stu 8,4(3)
+ cmpi 0,0,0
+ mul 10,0,6
+ cax 10,10,0 C adjust high limb for negative s2_limb
+ mfmq 0
+ ae 8,0,9
+ bge Ln0
+ cax 10,10,6 C adjust high limb for negative limb from s1
+Ln0: bdz Lend0
+ lu 0,4(4)
+ stu 8,4(3)
+ cmpi 0,0,0
+ mul 9,0,6
+ cax 9,9,0 C adjust high limb for negative s2_limb
+ mfmq 0
+ ae 8,0,10
+ bge Ln1
+ cax 9,9,6 C adjust high limb for negative limb from s1
+Ln1: bdn Lnloop
+ b Lend
+
+Lend0: cal 9,0(10)
+Lend: st 8,4(3)
+ aze 3,9
+ br
+EPILOGUE(mpn_mul_1)
diff --git a/mpn/power/mul_1.s b/mpn/power/mul_1.s
deleted file mode 100644
index 61869437e..000000000
--- a/mpn/power/mul_1.s
+++ /dev/null
@@ -1,109 +0,0 @@
-# IBM POWER __gmpn_mul_1 -- Multiply a limb vector with a limb and store
-# the result in a second limb vector.
-
-# Copyright 1992, 1994, 1999, 2000 Free Software Foundation, Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
-# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
-# MA 02111-1307, USA.
-
-
-# INPUT PARAMETERS
-# res_ptr r3
-# s1_ptr r4
-# size r5
-# s2_limb r6
-
-# The POWER architecture has no unsigned 32x32->64 bit multiplication
-# instruction. To obtain that operation, we have to use the 32x32->64 signed
-# multiplication instruction, and add the appropriate compensation to the high
-# limb of the result. We add the multiplicand if the multiplier has its most
-# significant bit set, and we add the multiplier if the multiplicand has its
-# most significant bit set. We need to preserve the carry flag between each
-# iteration, so we have to compute the compensation carefully (the natural,
-# srai+and doesn't work). Since the POWER architecture has a branch unit we
-# can branch in zero cycles, so that's how we perform the additions.
-
- .toc
- .globl __gmpn_mul_1
- .globl .__gmpn_mul_1
- .csect __gmpn_mul_1[DS]
-__gmpn_mul_1:
- .long .__gmpn_mul_1, TOC[tc0], 0
- .csect .text[PR]
- .align 2
-.__gmpn_mul_1:
-
- cal 3,-4(3)
- l 0,0(4)
- cmpi 0,6,0
- mtctr 5
- mul 9,0,6
- srai 7,0,31
- and 7,7,6
- mfmq 8
- ai 0,0,0 # reset carry
- cax 9,9,7
- blt Lneg
-Lpos: bdz Lend
-Lploop: lu 0,4(4)
- stu 8,4(3)
- cmpi 0,0,0
- mul 10,0,6
- mfmq 0
- ae 8,0,9
- bge Lp0
- cax 10,10,6 # adjust high limb for negative limb from s1
-Lp0: bdz Lend0
- lu 0,4(4)
- stu 8,4(3)
- cmpi 0,0,0
- mul 9,0,6
- mfmq 0
- ae 8,0,10
- bge Lp1
- cax 9,9,6 # adjust high limb for negative limb from s1
-Lp1: bdn Lploop
- b Lend
-
-Lneg: cax 9,9,0
- bdz Lend
-Lnloop: lu 0,4(4)
- stu 8,4(3)
- cmpi 0,0,0
- mul 10,0,6
- cax 10,10,0 # adjust high limb for negative s2_limb
- mfmq 0
- ae 8,0,9
- bge Ln0
- cax 10,10,6 # adjust high limb for negative limb from s1
-Ln0: bdz Lend0
- lu 0,4(4)
- stu 8,4(3)
- cmpi 0,0,0
- mul 9,0,6
- cax 9,9,0 # adjust high limb for negative s2_limb
- mfmq 0
- ae 8,0,10
- bge Ln1
- cax 9,9,6 # adjust high limb for negative limb from s1
-Ln1: bdn Lnloop
- b Lend
-
-Lend0: cal 9,0(10)
-Lend: st 8,4(3)
- aze 3,9
- br
diff --git a/mpn/power/rshift.asm b/mpn/power/rshift.asm
new file mode 100644
index 000000000..2b8c07d0e
--- /dev/null
+++ b/mpn/power/rshift.asm
@@ -0,0 +1,50 @@
+dnl IBM POWER mpn_rshift -- Shift a number right.
+
+dnl Copyright 1992, 1994, 1999, 2000, 2001 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+dnl INPUT PARAMETERS
+dnl res_ptr r3
+dnl s_ptr r4
+dnl size r5
+dnl cnt r6
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+ sfi 8,6,32
+ mtctr 5 C put limb count in CTR loop register
+ l 0,0(4) C read least significant limb
+ ai 9,3,-4 C adjust res_ptr since it's offset in the stu:s
+ sle 3,0,8 C compute carry limb, and init MQ register
+ bdz Lend2 C if just one limb, skip loop
+ lu 0,4(4) C read 2:nd least significant limb
+ sleq 7,0,8 C compute least significant limb of result
+ bdz Lend C if just two limb, skip loop
+Loop: lu 0,4(4) C load next higher limb
+ stu 7,4(9) C store previous result during read latency
+ sleq 7,0,8 C compute result limb
+ bdn Loop C loop back until CTR is zero
+Lend: stu 7,4(9) C store 2:nd most significant limb
+Lend2: sre 7,0,6 C compute most significant limb
+ st 7,4(9) C store it
+ br
+EPILOGUE(mpn_rshift)
diff --git a/mpn/power/rshift.s b/mpn/power/rshift.s
deleted file mode 100644
index e95cf7dca..000000000
--- a/mpn/power/rshift.s
+++ /dev/null
@@ -1,54 +0,0 @@
-# IBM POWER __gmpn_rshift --
-
-# Copyright 1992, 1994, 1999, 2000 Free Software Foundation, Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
-# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
-# MA 02111-1307, USA.
-
-
-# INPUT PARAMETERS
-# res_ptr r3
-# s_ptr r4
-# size r5
-# cnt r6
-
- .toc
- .globl __gmpn_rshift
- .globl .__gmpn_rshift
- .csect __gmpn_rshift[DS]
-__gmpn_rshift:
- .long .__gmpn_rshift, TOC[tc0], 0
- .csect .text[PR]
- .align 2
-.__gmpn_rshift:
- sfi 8,6,32
- mtctr 5 # put limb count in CTR loop register
- l 0,0(4) # read least significant limb
- ai 9,3,-4 # adjust res_ptr since it's offset in the stu:s
- sle 3,0,8 # compute carry limb, and init MQ register
- bdz Lend2 # if just one limb, skip loop
- lu 0,4(4) # read 2:nd least significant limb
- sleq 7,0,8 # compute least significant limb of result
- bdz Lend # if just two limb, skip loop
-Loop: lu 0,4(4) # load next higher limb
- stu 7,4(9) # store previous result during read latency
- sleq 7,0,8 # compute result limb
- bdn Loop # loop back until CTR is zero
-Lend: stu 7,4(9) # store 2:nd most significant limb
-Lend2: sre 7,0,6 # compute most significant limb
- st 7,4(9) # store it" \
- br
diff --git a/mpn/power/sdiv.asm b/mpn/power/sdiv.asm
new file mode 100644
index 000000000..75bcbb790
--- /dev/null
+++ b/mpn/power/sdiv.asm
@@ -0,0 +1,30 @@
+dnl Copyright 1999, 2001 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_sdiv_qrnnd)
+ mtmq 5
+ div 0,4,6
+ mfmq 9
+ st 9,0(3)
+ mr 3,0
+ br
+EPILOGUE(mpn_sdiv_qrnnd)
diff --git a/mpn/power/sdiv.s b/mpn/power/sdiv.s
deleted file mode 100644
index a6ca4246e..000000000
--- a/mpn/power/sdiv.s
+++ /dev/null
@@ -1,34 +0,0 @@
-# Copyright 1999 Free Software Foundation, Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
-# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
-# MA 02111-1307, USA.
-
- .toc
- .globl __sdiv_qrnnd
- .globl .__sdiv_qrnnd
- .csect __sdiv_qrnnd[DS]
-__sdiv_qrnnd:
- .long .__sdiv_qrnnd, TOC[tc0], 0
- .csect .text[PR]
- .align 2
-.__sdiv_qrnnd:
- mtmq 5
- div 0,4,6
- mfmq 9
- st 9,0(3)
- mr 3,0
- br
diff --git a/mpn/power/sub_n.asm b/mpn/power/sub_n.asm
new file mode 100644
index 000000000..e4e9892db
--- /dev/null
+++ b/mpn/power/sub_n.asm
@@ -0,0 +1,77 @@
+dnl IBM POWER mpn_sub_n -- Subtract two limb vectors of equal, non-zero
+dnl length.
+
+dnl Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2001 Free Software
+dnl Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+dnl INPUT PARAMETERS
+dnl res_ptr r3
+dnl s1_ptr r4
+dnl s2_ptr r5
+dnl size r6
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_sub_n)
+ andil. 10,6,1 C odd or even number of limbs?
+ l 8,0(4) C load least significant s1 limb
+ l 0,0(5) C load least significant s2 limb
+ cal 3,-4(3) C offset res_ptr, it's updated before it's used
+ sri 10,6,1 C count for unrolled loop
+ sf 7,0,8 C subtract least significant limbs, set cy
+ mtctr 10 C copy count into CTR
+ beq 0,Leven C branch if even C of limbs (C of limbs >= 2)
+
+C We have an odd C of limbs. Add the first limbs separately.
+ cmpi 1,10,0 C is count for unrolled loop zero?
+ bc 4,6,L1 C bne cr1,L1 (misassembled by gas)
+ st 7,4(3)
+ sfe 3,0,0 C load !cy into ...
+ sfi 3,3,0 C ... return value register
+ br C return
+
+C We added least significant limbs. Now reload the next limbs to enter loop.
+L1: lu 8,4(4) C load s1 limb and update s1_ptr
+ lu 0,4(5) C load s2 limb and update s2_ptr
+ stu 7,4(3)
+ sfe 7,0,8 C subtract limbs, set cy
+Leven: lu 9,4(4) C load s1 limb and update s1_ptr
+ lu 10,4(5) C load s2 limb and update s2_ptr
+ bdz Lend C If done, skip loop
+
+Loop: lu 8,4(4) C load s1 limb and update s1_ptr
+ lu 0,4(5) C load s2 limb and update s2_ptr
+ sfe 11,10,9 C subtract previous limbs with cy, set cy
+ stu 7,4(3) C
+ lu 9,4(4) C load s1 limb and update s1_ptr
+ lu 10,4(5) C load s2 limb and update s2_ptr
+ sfe 7,0,8 C subtract previous limbs with cy, set cy
+ stu 11,4(3) C
+ bdn Loop C decrement CTR and loop back
+
+Lend: sfe 11,10,9 C subtract limbs with cy, set cy
+ st 7,4(3) C
+ st 11,8(3) C
+ sfe 3,0,0 C load !cy into ...
+ sfi 3,3,0 C ... return value register
+ br
+EPILOGUE(mpn_sub_n)
diff --git a/mpn/power/sub_n.s b/mpn/power/sub_n.s
deleted file mode 100644
index a8ecd204f..000000000
--- a/mpn/power/sub_n.s
+++ /dev/null
@@ -1,80 +0,0 @@
-# IBM POWER __gmpn_sub_n -- Subtract two limb vectors of equal, non-zero length.
-
-# Copyright 1992, 1994, 1995, 1996, 1999, 2000 Free Software Foundation,
-# Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
-# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
-# MA 02111-1307, USA.
-
-
-# INPUT PARAMETERS
-# res_ptr r3
-# s1_ptr r4
-# s2_ptr r5
-# size r6
-
- .toc
- .globl __gmpn_sub_n
- .globl .__gmpn_sub_n
- .csect __gmpn_sub_n[DS]
-__gmpn_sub_n:
- .long .__gmpn_sub_n, TOC[tc0], 0
- .csect .text[PR]
- .align 2
-.__gmpn_sub_n:
- andil. 10,6,1 # odd or even number of limbs?
- l 8,0(4) # load least significant s1 limb
- l 0,0(5) # load least significant s2 limb
- cal 3,-4(3) # offset res_ptr, it's updated before it's used
- sri 10,6,1 # count for unrolled loop
- sf 7,0,8 # subtract least significant limbs, set cy
- mtctr 10 # copy count into CTR
- beq 0,Leven # branch if even # of limbs (# of limbs >= 2)
-
-# We have an odd # of limbs. Add the first limbs separately.
- cmpi 1,10,0 # is count for unrolled loop zero?
- bc 4,6,L1 # bne cr1,L1 (misassembled by gas)
- st 7,4(3)
- sfe 3,0,0 # load !cy into ...
- sfi 3,3,0 # ... return value register
- br # return
-
-# We added least significant limbs. Now reload the next limbs to enter loop.
-L1: lu 8,4(4) # load s1 limb and update s1_ptr
- lu 0,4(5) # load s2 limb and update s2_ptr
- stu 7,4(3)
- sfe 7,0,8 # subtract limbs, set cy
-Leven: lu 9,4(4) # load s1 limb and update s1_ptr
- lu 10,4(5) # load s2 limb and update s2_ptr
- bdz Lend # If done, skip loop
-
-Loop: lu 8,4(4) # load s1 limb and update s1_ptr
- lu 0,4(5) # load s2 limb and update s2_ptr
- sfe 11,10,9 # subtract previous limbs with cy, set cy
- stu 7,4(3) #
- lu 9,4(4) # load s1 limb and update s1_ptr
- lu 10,4(5) # load s2 limb and update s2_ptr
- sfe 7,0,8 # subtract previous limbs with cy, set cy
- stu 11,4(3) #
- bdn Loop # decrement CTR and loop back
-
-Lend: sfe 11,10,9 # subtract limbs with cy, set cy
- st 7,4(3) #
- st 11,8(3) #
- sfe 3,0,0 # load !cy into ...
- sfi 3,3,0 # ... return value register
- br
diff --git a/mpn/power/submul_1.asm b/mpn/power/submul_1.asm
new file mode 100644
index 000000000..70f685ac2
--- /dev/null
+++ b/mpn/power/submul_1.asm
@@ -0,0 +1,122 @@
+dnl IBM POWER mpn_submul_1 -- Multiply a limb vector with a limb and subtract
+dnl the result from a second limb vector.
+
+dnl Copyright 1992, 1994, 1999, 2000, 2001 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+dnl INPUT PARAMETERS
+dnl res_ptr r3
+dnl s1_ptr r4
+dnl size r5
+dnl s2_limb r6
+
+dnl The POWER architecture has no unsigned 32x32->64 bit multiplication
+dnl instruction. To obtain that operation, we have to use the 32x32->64
+dnl signed multiplication instruction, and add the appropriate compensation to
+dnl the high limb of the result. We add the multiplicand if the multiplier
+dnl has its most significant bit set, and we add the multiplier if the
+dnl multiplicand has its most significant bit set. We need to preserve the
+dnl carry flag between each iteration, so we have to compute the compensation
+dnl carefully (the natural, srai+and doesn't work). Since all POWER can
+dnl branch in zero cycles, we use conditional branches to for the additions.
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+ cal 3,-4(3)
+ l 0,0(4)
+ cmpi 0,6,0
+ mtctr 5
+ mul 9,0,6
+ srai 7,0,31
+ and 7,7,6
+ mfmq 11
+ cax 9,9,7
+ l 7,4(3)
+ sf 8,11,7 C add res_limb
+ a 11,8,11 C invert cy (r11 is junk)
+ blt Lneg
+Lpos: bdz Lend
+
+Lploop: lu 0,4(4)
+ stu 8,4(3)
+ cmpi 0,0,0
+ mul 10,0,6
+ mfmq 0
+ ae 11,0,9 C low limb + old_cy_limb + old cy
+ l 7,4(3)
+ aze 10,10 C propagate cy to new cy_limb
+ sf 8,11,7 C add res_limb
+ a 11,8,11 C invert cy (r11 is junk)
+ bge Lp0
+ cax 10,10,6 C adjust high limb for negative limb from s1
+Lp0: bdz Lend0
+ lu 0,4(4)
+ stu 8,4(3)
+ cmpi 0,0,0
+ mul 9,0,6
+ mfmq 0
+ ae 11,0,10
+ l 7,4(3)
+ aze 9,9
+ sf 8,11,7
+ a 11,8,11 C invert cy (r11 is junk)
+ bge Lp1
+ cax 9,9,6 C adjust high limb for negative limb from s1
+Lp1: bdn Lploop
+
+ b Lend
+
+Lneg: cax 9,9,0
+ bdz Lend
+Lnloop: lu 0,4(4)
+ stu 8,4(3)
+ cmpi 0,0,0
+ mul 10,0,6
+ mfmq 7
+ ae 11,7,9
+ l 7,4(3)
+ ae 10,10,0 C propagate cy to new cy_limb
+ sf 8,11,7 C add res_limb
+ a 11,8,11 C invert cy (r11 is junk)
+ bge Ln0
+ cax 10,10,6 C adjust high limb for negative limb from s1
+Ln0: bdz Lend0
+ lu 0,4(4)
+ stu 8,4(3)
+ cmpi 0,0,0
+ mul 9,0,6
+ mfmq 7
+ ae 11,7,10
+ l 7,4(3)
+ ae 9,9,0 C propagate cy to new cy_limb
+ sf 8,11,7 C add res_limb
+ a 11,8,11 C invert cy (r11 is junk)
+ bge Ln1
+ cax 9,9,6 C adjust high limb for negative limb from s1
+Ln1: bdn Lnloop
+ b Lend
+
+Lend0: cal 9,0(10)
+Lend: st 8,4(3)
+ aze 3,9
+ br
+EPILOGUE(mpn_submul_1)
diff --git a/mpn/power/submul_1.s b/mpn/power/submul_1.s
deleted file mode 100644
index 972bf876c..000000000
--- a/mpn/power/submul_1.s
+++ /dev/null
@@ -1,127 +0,0 @@
-# IBM POWER __gmpn_submul_1 -- Multiply a limb vector with a limb and subtract
-# the result from a second limb vector.
-
-# Copyright 1992, 1994, 1999, 2000 Free Software Foundation, Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
-# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
-# MA 02111-1307, USA.
-
-
-# INPUT PARAMETERS
-# res_ptr r3
-# s1_ptr r4
-# size r5
-# s2_limb r6
-
-# The POWER architecture has no unsigned 32x32->64 bit multiplication
-# instruction. To obtain that operation, we have to use the 32x32->64 signed
-# multiplication instruction, and add the appropriate compensation to the high
-# limb of the result. We add the multiplicand if the multiplier has its most
-# significant bit set, and we add the multiplier if the multiplicand has its
-# most significant bit set. We need to preserve the carry flag between each
-# iteration, so we have to compute the compensation carefully (the natural,
-# srai+and doesn't work). Since the POWER architecture has a branch unit we
-# can branch in zero cycles, so that's how we perform the additions.
-
- .toc
- .globl __gmpn_submul_1
- .globl .__gmpn_submul_1
- .csect __gmpn_submul_1[DS]
-__gmpn_submul_1:
- .long .__gmpn_submul_1, TOC[tc0], 0
- .csect .text[PR]
- .align 2
-.__gmpn_submul_1:
-
- cal 3,-4(3)
- l 0,0(4)
- cmpi 0,6,0
- mtctr 5
- mul 9,0,6
- srai 7,0,31
- and 7,7,6
- mfmq 11
- cax 9,9,7
- l 7,4(3)
- sf 8,11,7 # add res_limb
- a 11,8,11 # invert cy (r11 is junk)
- blt Lneg
-Lpos: bdz Lend
-
-Lploop: lu 0,4(4)
- stu 8,4(3)
- cmpi 0,0,0
- mul 10,0,6
- mfmq 0
- ae 11,0,9 # low limb + old_cy_limb + old cy
- l 7,4(3)
- aze 10,10 # propagate cy to new cy_limb
- sf 8,11,7 # add res_limb
- a 11,8,11 # invert cy (r11 is junk)
- bge Lp0
- cax 10,10,6 # adjust high limb for negative limb from s1
-Lp0: bdz Lend0
- lu 0,4(4)
- stu 8,4(3)
- cmpi 0,0,0
- mul 9,0,6
- mfmq 0
- ae 11,0,10
- l 7,4(3)
- aze 9,9
- sf 8,11,7
- a 11,8,11 # invert cy (r11 is junk)
- bge Lp1
- cax 9,9,6 # adjust high limb for negative limb from s1
-Lp1: bdn Lploop
-
- b Lend
-
-Lneg: cax 9,9,0
- bdz Lend
-Lnloop: lu 0,4(4)
- stu 8,4(3)
- cmpi 0,0,0
- mul 10,0,6
- mfmq 7
- ae 11,7,9
- l 7,4(3)
- ae 10,10,0 # propagate cy to new cy_limb
- sf 8,11,7 # add res_limb
- a 11,8,11 # invert cy (r11 is junk)
- bge Ln0
- cax 10,10,6 # adjust high limb for negative limb from s1
-Ln0: bdz Lend0
- lu 0,4(4)
- stu 8,4(3)
- cmpi 0,0,0
- mul 9,0,6
- mfmq 7
- ae 11,7,10
- l 7,4(3)
- ae 9,9,0 # propagate cy to new cy_limb
- sf 8,11,7 # add res_limb
- a 11,8,11 # invert cy (r11 is junk)
- bge Ln1
- cax 9,9,6 # adjust high limb for negative limb from s1
-Ln1: bdn Lnloop
- b Lend
-
-Lend0: cal 9,0(10)
-Lend: st 8,4(3)
- aze 3,9
- br
diff --git a/mpn/power/umul.asm b/mpn/power/umul.asm
new file mode 100644
index 000000000..82eb6ee8e
--- /dev/null
+++ b/mpn/power/umul.asm
@@ -0,0 +1,34 @@
+dnl Copyright 1999, 2001 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_umul_ppmm)
+ mul 9,4,5
+ srai 0,4,31
+ and 0,0,5
+ srai 5,5,31
+ and 5,5,4
+ cax 0,0,5
+ mfmq 11
+ st 11,0(3)
+ cax 3,9,0
+ br
+EPILOGUE(mpn_umul_ppmm)
diff --git a/mpn/power/umul.s b/mpn/power/umul.s
deleted file mode 100644
index f2f85503c..000000000
--- a/mpn/power/umul.s
+++ /dev/null
@@ -1,38 +0,0 @@
-# Copyright 1999 Free Software Foundation, Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
-# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
-# MA 02111-1307, USA.
-
- .toc
- .globl __umul_ppmm
- .globl .__umul_ppmm
- .csect __umul_ppmm[DS]
-__umul_ppmm:
- .long .__umul_ppmm, TOC[tc0], 0
- .csect .text[PR]
- .align 2
-.__umul_ppmm:
- mul 9,4,5
- srai 0,4,31
- and 0,0,5
- srai 5,5,31
- and 5,5,4
- cax 0,0,5
- mfmq 11
- st 11,0(3)
- cax 3,9,0
- br