Convert files to `.asm'.

Prefix umul_ppmm and sdiv_qrnnd. Update some comments.
author: tege <tege@gmplib.org> 2001-02-10 01:46:50 +0100
committer: tege <tege@gmplib.org> 2001-02-10 01:46:50 +0100
commit: f1f0a217a06cbdd9b541c8d84297aaa840a6ad93 (patch)
tree: 1833943d05f7316c17c6acf95019b43d0808fb41 /mpn/power
parent: ab64a1b0078eba23d2d9d44026dd05540e17db71 (diff)
download: gmp-f1f0a217a06cbdd9b541c8d84297aaa840a6ad93.tar.gz
18 files changed, 661 insertions, 699 deletions
diff --git a/mpn/power/add_n.asm b/mpn/power/add_n.asm
new file mode 100644
index 000000000..ef0d53080
--- /dev/null
+++ b/mpn/power/add_n.asm
@@ -0,0 +1,75 @@
+dnl  IBM POWER mpn_add_n -- Add two limb vectors of equal, non-zero length.
+
+dnl  Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2001 Free Software
+dnl  Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl  MA 02111-1307, USA.
+
+
+dnl  INPUT PARAMETERS
+dnl  res_ptr	r3
+dnl  s1_ptr	r4
+dnl  s2_ptr	r5
+dnl  size	r6
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_add_n)
+	andil.	10,6,1		C odd or even number of limbs?
+	l	8,0(4)		C load least significant s1 limb
+	l	0,0(5)		C load least significant s2 limb
+	cal	3,-4(3)		C offset res_ptr, it's updated before it's used
+	sri	10,6,1		C count for unrolled loop
+	a	7,0,8		C add least significant limbs, set cy
+	mtctr	10		C copy count into CTR
+	beq	0,Leven		C branch if even C of limbs (C of limbs >= 2)
+
+C We have an odd C of limbs.  Add the first limbs separately.
+	cmpi	1,10,0		C is count for unrolled loop zero?
+	bc	4,6,L1		C bne cr1,L1 (misassembled by gas)
+	st	7,4(3)
+	aze	3,10		C use the fact that r10 is zero...
+	br			C return
+
+C We added least significant limbs.  Now reload the next limbs to enter loop.
+L1:	lu	8,4(4)		C load s1 limb and update s1_ptr
+	lu	0,4(5)		C load s2 limb and update s2_ptr
+	stu	7,4(3)
+	ae	7,0,8		C add limbs, set cy
+Leven:	lu	9,4(4)		C load s1 limb and update s1_ptr
+	lu	10,4(5)		C load s2 limb and update s2_ptr
+	bdz	Lend		C If done, skip loop
+
+Loop:	lu	8,4(4)		C load s1 limb and update s1_ptr
+	lu	0,4(5)		C load s2 limb and update s2_ptr
+	ae	11,10,9		C add previous limbs with cy, set cy
+	stu	7,4(3)		C 
+	lu	9,4(4)		C load s1 limb and update s1_ptr
+	lu	10,4(5)		C load s2 limb and update s2_ptr
+	ae	7,0,8		C add previous limbs with cy, set cy
+	stu	11,4(3)		C 
+	bdn	Loop		C decrement CTR and loop back
+
+Lend:	ae	11,10,9		C add limbs with cy, set cy
+	st	7,4(3)		C 
+	st	11,8(3)		C 
+	lil	3,0		C load cy into ...
+	aze	3,3		C ... return value register
+	br
+EPILOGUE(mpn_add_n)
diff --git a/mpn/power/add_n.s b/mpn/power/add_n.s
deleted file mode 100644
index 68d10e4d0..000000000
--- a/mpn/power/add_n.s
+++ /dev/null
@@ -1,79 +0,0 @@
-# IBM POWER __gmpn_add_n -- Add two limb vectors of equal, non-zero length.
-
-# Copyright 1992, 1994, 1995, 1996, 1999, 2000 Free Software Foundation,
-# Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
-# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
-# MA 02111-1307, USA.
-
-
-# INPUT PARAMETERS
-# res_ptr	r3
-# s1_ptr	r4
-# s2_ptr	r5
-# size		r6
-
-	.toc
-	.globl	__gmpn_add_n
-	.globl	.__gmpn_add_n
-	.csect	__gmpn_add_n[DS]
-__gmpn_add_n:
-	.long	.__gmpn_add_n, TOC[tc0], 0
-	.csect	.text[PR]
-	.align	2
-.__gmpn_add_n:
-	andil.	10,6,1		# odd or even number of limbs?
-	l	8,0(4)		# load least significant s1 limb
-	l	0,0(5)		# load least significant s2 limb
-	cal	3,-4(3)		# offset res_ptr, it's updated before it's used
-	sri	10,6,1		# count for unrolled loop
-	a	7,0,8		# add least significant limbs, set cy
-	mtctr	10		# copy count into CTR
-	beq	0,Leven		# branch if even # of limbs (# of limbs >= 2)
-
-# We have an odd # of limbs.  Add the first limbs separately.
-	cmpi	1,10,0		# is count for unrolled loop zero?
-	bc	4,6,L1		# bne cr1,L1 (misassembled by gas)
-	st	7,4(3)
-	aze	3,10		# use the fact that r10 is zero...
-	br			# return
-
-# We added least significant limbs.  Now reload the next limbs to enter loop.
-L1:	lu	8,4(4)		# load s1 limb and update s1_ptr
-	lu	0,4(5)		# load s2 limb and update s2_ptr
-	stu	7,4(3)
-	ae	7,0,8		# add limbs, set cy
-Leven:	lu	9,4(4)		# load s1 limb and update s1_ptr
-	lu	10,4(5)		# load s2 limb and update s2_ptr
-	bdz	Lend		# If done, skip loop
-
-Loop:	lu	8,4(4)		# load s1 limb and update s1_ptr
-	lu	0,4(5)		# load s2 limb and update s2_ptr
-	ae	11,9,10		# add previous limbs with cy, set cy
-	stu	7,4(3)		# 
-	lu	9,4(4)		# load s1 limb and update s1_ptr
-	lu	10,4(5)		# load s2 limb and update s2_ptr
-	ae	7,0,8		# add previous limbs with cy, set cy
-	stu	11,4(3)		# 
-	bdn	Loop		# decrement CTR and loop back
-
-Lend:	ae	11,9,10		# add limbs with cy, set cy
-	st	7,4(3)		# 
-	st	11,8(3)		# 
-	lil	3,0		# load cy into ...
-	aze	3,3		# ... return value register
-	br
diff --git a/mpn/power/addmul_1.asm b/mpn/power/addmul_1.asm
new file mode 100644
index 000000000..1e1e358c3
--- /dev/null
+++ b/mpn/power/addmul_1.asm
@@ -0,0 +1,117 @@
+dnl  IBM POWER mpn_addmul_1 -- Multiply a limb vector with a limb and add the
+dnl  result to a second limb vector.
+
+dnl  Copyright 1992, 1994, 1999, 2000, 2001 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl  MA 02111-1307, USA.
+
+
+dnl  INPUT PARAMETERS
+dnl  res_ptr	r3
+dnl  s1_ptr	r4
+dnl  size	r5
+dnl  s2_limb	r6
+
+dnl  The POWER architecture has no unsigned 32x32->64 bit multiplication
+dnl  instruction.  To obtain that operation, we have to use the 32x32->64
+dnl  signed multiplication instruction, and add the appropriate compensation to
+dnl  the high limb of the result.  We add the multiplicand if the multiplier
+dnl  has its most significant bit set, and we add the multiplier if the
+dnl  multiplicand has its most significant bit set.  We need to preserve the
+dnl  carry flag between each iteration, so we have to compute the compensation
+dnl  carefully (the natural, srai+and doesn't work).  Since all POWER can
+dnl  branch in zero cycles, we use conditional branches to for the additions.
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+	cal	3,-4(3)
+	l	0,0(4)
+	cmpi	0,6,0
+	mtctr	5
+	mul	9,0,6
+	srai	7,0,31
+	and	7,7,6
+	mfmq	8
+	cax	9,9,7
+	l	7,4(3)
+	a	8,8,7		C add res_limb
+	blt	Lneg
+Lpos:	bdz	Lend
+
+Lploop:	lu	0,4(4)
+	stu	8,4(3)
+	cmpi	0,0,0
+	mul	10,0,6
+	mfmq	0
+	ae	8,0,9		C low limb + old_cy_limb + old cy
+	l	7,4(3)
+	aze	10,10		C propagate cy to new cy_limb
+	a	8,8,7		C add res_limb
+	bge	Lp0
+	cax	10,10,6		C adjust high limb for negative limb from s1
+Lp0:	bdz	Lend0
+	lu	0,4(4)
+	stu	8,4(3)
+	cmpi	0,0,0
+	mul	9,0,6
+	mfmq	0
+	ae	8,0,10
+	l	7,4(3)
+	aze	9,9
+	a	8,8,7
+	bge	Lp1
+	cax	9,9,6		C adjust high limb for negative limb from s1
+Lp1:	bdn	Lploop
+
+	b	Lend
+
+Lneg:	cax	9,9,0
+	bdz	Lend
+Lnloop:	lu	0,4(4)
+	stu	8,4(3)
+	cmpi	0,0,0
+	mul	10,0,6
+	mfmq	7
+	ae	8,7,9
+	l	7,4(3)
+	ae	10,10,0		C propagate cy to new cy_limb
+	a	8,8,7		C add res_limb
+	bge	Ln0
+	cax	10,10,6		C adjust high limb for negative limb from s1
+Ln0:	bdz	Lend0
+	lu	0,4(4)
+	stu	8,4(3)
+	cmpi	0,0,0
+	mul	9,0,6
+	mfmq	7
+	ae	8,7,10
+	l	7,4(3)
+	ae	9,9,0		C propagate cy to new cy_limb
+	a	8,8,7		C add res_limb
+	bge	Ln1
+	cax	9,9,6		C adjust high limb for negative limb from s1
+Ln1:	bdn	Lnloop
+	b	Lend
+
+Lend0:	cal	9,0(10)
+Lend:	st	8,4(3)
+	aze	3,9
+	br
+EPILOGUE(mpn_addmul_1)
diff --git a/mpn/power/addmul_1.s b/mpn/power/addmul_1.s
deleted file mode 100644
index aefbedc24..000000000
--- a/mpn/power/addmul_1.s
+++ /dev/null
@@ -1,122 +0,0 @@
-# IBM POWER __gmpn_addmul_1 -- Multiply a limb vector with a limb and add
-# the result to a second limb vector.
-
-# Copyright 1992, 1994, 1999, 2000 Free Software Foundation, Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
-# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
-# MA 02111-1307, USA.
-
-
-# INPUT PARAMETERS
-# res_ptr	r3
-# s1_ptr	r4
-# size		r5
-# s2_limb	r6
-
-# The POWER architecture has no unsigned 32x32->64 bit multiplication
-# instruction.  To obtain that operation, we have to use the 32x32->64 signed
-# multiplication instruction, and add the appropriate compensation to the high
-# limb of the result.  We add the multiplicand if the multiplier has its most
-# significant bit set, and we add the multiplier if the multiplicand has its
-# most significant bit set.  We need to preserve the carry flag between each
-# iteration, so we have to compute the compensation carefully (the natural,
-# srai+and doesn't work).  Since the POWER architecture has a branch unit we
-# can branch in zero cycles, so that's how we perform the additions.
-
-	.toc
-	.globl	__gmpn_addmul_1
-	.globl	.__gmpn_addmul_1
-	.csect	__gmpn_addmul_1[DS]
-__gmpn_addmul_1:
-	.long	.__gmpn_addmul_1, TOC[tc0], 0
-	.csect	.text[PR]
-	.align	2
-.__gmpn_addmul_1:
-
-	cal	3,-4(3)
-	l	0,0(4)
-	cmpi	0,6,0
-	mtctr	5
-	mul	9,0,6
-	srai	7,0,31
-	and	7,7,6
-	mfmq	8
-	cax	9,9,7
-	l	7,4(3)
-	a	8,8,7		# add res_limb
-	blt	Lneg
-Lpos:	bdz	Lend
-
-Lploop:	lu	0,4(4)
-	stu	8,4(3)
-	cmpi	0,0,0
-	mul	10,0,6
-	mfmq	0
-	ae	8,0,9		# low limb + old_cy_limb + old cy
-	l	7,4(3)
-	aze	10,10		# propagate cy to new cy_limb
-	a	8,8,7		# add res_limb
-	bge	Lp0
-	cax	10,10,6		# adjust high limb for negative limb from s1
-Lp0:	bdz	Lend0
-	lu	0,4(4)
-	stu	8,4(3)
-	cmpi	0,0,0
-	mul	9,0,6
-	mfmq	0
-	ae	8,0,10
-	l	7,4(3)
-	aze	9,9
-	a	8,8,7
-	bge	Lp1
-	cax	9,9,6		# adjust high limb for negative limb from s1
-Lp1:	bdn	Lploop
-
-	b	Lend
-
-Lneg:	cax	9,9,0
-	bdz	Lend
-Lnloop:	lu	0,4(4)
-	stu	8,4(3)
-	cmpi	0,0,0
-	mul	10,0,6
-	mfmq	7
-	ae	8,7,9
-	l	7,4(3)
-	ae	10,10,0		# propagate cy to new cy_limb
-	a	8,8,7		# add res_limb
-	bge	Ln0
-	cax	10,10,6		# adjust high limb for negative limb from s1
-Ln0:	bdz	Lend0
-	lu	0,4(4)
-	stu	8,4(3)
-	cmpi	0,0,0
-	mul	9,0,6
-	mfmq	7
-	ae	8,7,10
-	l	7,4(3)
-	ae	9,9,0		# propagate cy to new cy_limb
-	a	8,8,7		# add res_limb
-	bge	Ln1
-	cax	9,9,6		# adjust high limb for negative limb from s1
-Ln1:	bdn	Lnloop
-	b	Lend
-
-Lend0:	cal	9,0(10)
-Lend:	st	8,4(3)
-	aze	3,9
-	br
diff --git a/mpn/power/lshift.asm b/mpn/power/lshift.asm
new file mode 100644
index 000000000..c5358ff0f
--- /dev/null
+++ b/mpn/power/lshift.asm
@@ -0,0 +1,52 @@
+dnl  IBM POWER mpn_lshift -- Shift a number left.
+
+dnl  Copyright 1992, 1994, 1999, 2000, 2001 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl  MA 02111-1307, USA.
+
+
+dnl  INPUT PARAMETERS
+dnl  res_ptr	r3
+dnl  s_ptr	r4
+dnl  size	r5
+dnl  cnt	r6
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+	sli	0,5,2
+	cax	9,3,0
+	cax	4,4,0
+	sfi	8,6,32
+	mtctr	5		C put limb count in CTR loop register
+	lu	0,-4(4)		C read most significant limb
+	sre	3,0,8		C compute carry out limb, and init MQ register
+	bdz	Lend2		C if just one limb, skip loop
+	lu	0,-4(4)		C read 2:nd most significant limb
+	sreq	7,0,8		C compute most significant limb of result
+	bdz	Lend		C if just two limb, skip loop
+Loop:	lu	0,-4(4)		C load next lower limb
+	stu	7,-4(9)		C store previous result during read latency
+	sreq	7,0,8		C compute result limb
+	bdn	Loop		C loop back until CTR is zero
+Lend:	stu	7,-4(9)		C store 2:nd least significant limb
+Lend2:	sle	7,0,6		C compute least significant limb
+	st      7,-4(9)		C store it
+	br
+EPILOGUE(mpn_lshift)
diff --git a/mpn/power/lshift.s b/mpn/power/lshift.s
deleted file mode 100644
index fd2576476..000000000
--- a/mpn/power/lshift.s
+++ /dev/null
@@ -1,56 +0,0 @@
-# IBM POWER __gmpn_lshift -- 
-
-# Copyright 1992, 1994, 1999, 2000 Free Software Foundation, Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
-# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
-# MA 02111-1307, USA.
-
-
-# INPUT PARAMETERS
-# res_ptr	r3
-# s_ptr		r4
-# size		r5
-# cnt		r6
-
-	.toc
-	.globl	__gmpn_lshift
-	.globl	.__gmpn_lshift
-	.csect	__gmpn_lshift[DS]
-__gmpn_lshift:
-	.long	.__gmpn_lshift, TOC[tc0], 0
-	.csect	.text[PR]
-	.align	2
-.__gmpn_lshift:
-	sli	0,5,2
-	cax	9,3,0
-	cax	4,4,0
-	sfi	8,6,32
-	mtctr	5		# put limb count in CTR loop register
-	lu	0,-4(4)		# read most significant limb
-	sre	3,0,8		# compute carry out limb, and init MQ register
-	bdz	Lend2		# if just one limb, skip loop
-	lu	0,-4(4)		# read 2:nd most significant limb
-	sreq	7,0,8		# compute most significant limb of result
-	bdz	Lend		# if just two limb, skip loop
-Loop:	lu	0,-4(4)		# load next lower limb
-	stu	7,-4(9)		# store previous result during read latency
-	sreq	7,0,8		# compute result limb
-	bdn	Loop		# loop back until CTR is zero
-Lend:	stu	7,-4(9)		# store 2:nd least significant limb
-Lend2:	sle	7,0,6		# compute least significant limb
-	st      7,-4(9)		# store it"				\
-	br
diff --git a/mpn/power/mul_1.asm b/mpn/power/mul_1.asm
new file mode 100644
index 000000000..bdf009906
--- /dev/null
+++ b/mpn/power/mul_1.asm
@@ -0,0 +1,104 @@
+dnl  IBM POWER mpn_mul_1 -- Multiply a limb vector with a limb and store the
+dnl  result in a second limb vector.
+
+dnl  Copyright 1992, 1994, 1999, 2000, 2001 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl  MA 02111-1307, USA.
+
+
+dnl  INPUT PARAMETERS
+dnl  res_ptr	r3
+dnl  s1_ptr	r4
+dnl  size		r5
+dnl  s2_limb	r6
+
+dnl  The POWER architecture has no unsigned 32x32->64 bit multiplication
+dnl  instruction.  To obtain that operation, we have to use the 32x32->64
+dnl  signed multiplication instruction, and add the appropriate compensation to
+dnl  the high limb of the result.  We add the multiplicand if the multiplier
+dnl  has its most significant bit set, and we add the multiplier if the
+dnl  multiplicand has its most significant bit set.  We need to preserve the
+dnl  carry flag between each iteration, so we have to compute the compensation
+dnl  carefully (the natural, srai+and doesn't work).  Since all POWER can
+dnl  branch in zero cycles, we use conditional branches to for the additions.
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+	cal	3,-4(3)
+	l	0,0(4)
+	cmpi	0,6,0
+	mtctr	5
+	mul	9,0,6
+	srai	7,0,31
+	and	7,7,6
+	mfmq	8
+	ai	0,0,0		C reset carry
+	cax	9,9,7
+	blt	Lneg
+Lpos:	bdz	Lend
+Lploop:	lu	0,4(4)
+	stu	8,4(3)
+	cmpi	0,0,0
+	mul	10,0,6
+	mfmq	0
+	ae	8,0,9
+	bge	Lp0
+	cax	10,10,6		C adjust high limb for negative limb from s1
+Lp0:	bdz	Lend0
+	lu	0,4(4)
+	stu	8,4(3)
+	cmpi	0,0,0
+	mul	9,0,6
+	mfmq	0
+	ae	8,0,10
+	bge	Lp1
+	cax	9,9,6		C adjust high limb for negative limb from s1
+Lp1:	bdn	Lploop
+	b	Lend
+
+Lneg:	cax	9,9,0
+	bdz	Lend
+Lnloop:	lu	0,4(4)
+	stu	8,4(3)
+	cmpi	0,0,0
+	mul	10,0,6
+	cax	10,10,0		C adjust high limb for negative s2_limb
+	mfmq	0
+	ae	8,0,9
+	bge	Ln0
+	cax	10,10,6		C adjust high limb for negative limb from s1
+Ln0:	bdz	Lend0
+	lu	0,4(4)
+	stu	8,4(3)
+	cmpi	0,0,0
+	mul	9,0,6
+	cax	9,9,0		C adjust high limb for negative s2_limb
+	mfmq	0
+	ae	8,0,10
+	bge	Ln1
+	cax	9,9,6		C adjust high limb for negative limb from s1
+Ln1:	bdn	Lnloop
+	b	Lend
+
+Lend0:	cal	9,0(10)
+Lend:	st	8,4(3)
+	aze	3,9
+	br
+EPILOGUE(mpn_mul_1)
diff --git a/mpn/power/mul_1.s b/mpn/power/mul_1.s
deleted file mode 100644
index 61869437e..000000000
--- a/mpn/power/mul_1.s
+++ /dev/null
@@ -1,109 +0,0 @@
-# IBM POWER __gmpn_mul_1 -- Multiply a limb vector with a limb and store
-# the result in a second limb vector.
-
-# Copyright 1992, 1994, 1999, 2000 Free Software Foundation, Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
-# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
-# MA 02111-1307, USA.
-
-
-# INPUT PARAMETERS
-# res_ptr	r3
-# s1_ptr	r4
-# size		r5
-# s2_limb	r6
-
-# The POWER architecture has no unsigned 32x32->64 bit multiplication
-# instruction.  To obtain that operation, we have to use the 32x32->64 signed
-# multiplication instruction, and add the appropriate compensation to the high
-# limb of the result.  We add the multiplicand if the multiplier has its most
-# significant bit set, and we add the multiplier if the multiplicand has its
-# most significant bit set.  We need to preserve the carry flag between each
-# iteration, so we have to compute the compensation carefully (the natural,
-# srai+and doesn't work).  Since the POWER architecture has a branch unit we
-# can branch in zero cycles, so that's how we perform the additions.
-
-	.toc
-	.globl	__gmpn_mul_1
-	.globl	.__gmpn_mul_1
-	.csect	__gmpn_mul_1[DS]
-__gmpn_mul_1:
-	.long	.__gmpn_mul_1, TOC[tc0], 0
-	.csect	.text[PR]
-	.align	2
-.__gmpn_mul_1:
-
-	cal	3,-4(3)
-	l	0,0(4)
-	cmpi	0,6,0
-	mtctr	5
-	mul	9,0,6
-	srai	7,0,31
-	and	7,7,6
-	mfmq	8
-	ai	0,0,0		# reset carry
-	cax	9,9,7
-	blt	Lneg
-Lpos:	bdz	Lend
-Lploop:	lu	0,4(4)
-	stu	8,4(3)
-	cmpi	0,0,0
-	mul	10,0,6
-	mfmq	0
-	ae	8,0,9
-	bge	Lp0
-	cax	10,10,6		# adjust high limb for negative limb from s1
-Lp0:	bdz	Lend0
-	lu	0,4(4)
-	stu	8,4(3)
-	cmpi	0,0,0
-	mul	9,0,6
-	mfmq	0
-	ae	8,0,10
-	bge	Lp1
-	cax	9,9,6		# adjust high limb for negative limb from s1
-Lp1:	bdn	Lploop
-	b	Lend
-
-Lneg:	cax	9,9,0
-	bdz	Lend
-Lnloop:	lu	0,4(4)
-	stu	8,4(3)
-	cmpi	0,0,0
-	mul	10,0,6
-	cax	10,10,0		# adjust high limb for negative s2_limb
-	mfmq	0
-	ae	8,0,9
-	bge	Ln0
-	cax	10,10,6		# adjust high limb for negative limb from s1
-Ln0:	bdz	Lend0
-	lu	0,4(4)
-	stu	8,4(3)
-	cmpi	0,0,0
-	mul	9,0,6
-	cax	9,9,0		# adjust high limb for negative s2_limb
-	mfmq	0
-	ae	8,0,10
-	bge	Ln1
-	cax	9,9,6		# adjust high limb for negative limb from s1
-Ln1:	bdn	Lnloop
-	b	Lend
-
-Lend0:	cal	9,0(10)
-Lend:	st	8,4(3)
-	aze	3,9
-	br
diff --git a/mpn/power/rshift.asm b/mpn/power/rshift.asm
new file mode 100644
index 000000000..2b8c07d0e
--- /dev/null
+++ b/mpn/power/rshift.asm
@@ -0,0 +1,50 @@
+dnl  IBM POWER mpn_rshift -- Shift a number right.
+
+dnl  Copyright 1992, 1994, 1999, 2000, 2001 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl  MA 02111-1307, USA.
+
+
+dnl  INPUT PARAMETERS
+dnl  res_ptr	r3
+dnl  s_ptr	r4
+dnl  size	r5
+dnl  cnt	r6
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+	sfi	8,6,32
+	mtctr	5		C put limb count in CTR loop register
+	l	0,0(4)		C read least significant limb
+	ai	9,3,-4		C adjust res_ptr since it's offset in the stu:s
+	sle	3,0,8		C compute carry limb, and init MQ register
+	bdz	Lend2		C if just one limb, skip loop
+	lu	0,4(4)		C read 2:nd least significant limb
+	sleq	7,0,8		C compute least significant limb of result
+	bdz	Lend		C if just two limb, skip loop
+Loop:	lu	0,4(4)		C load next higher limb
+	stu	7,4(9)		C store previous result during read latency
+	sleq	7,0,8		C compute result limb
+	bdn	Loop		C loop back until CTR is zero
+Lend:	stu	7,4(9)		C store 2:nd most significant limb
+Lend2:	sre	7,0,6		C compute most significant limb
+	st      7,4(9)		C store it
+	br
+EPILOGUE(mpn_rshift)
diff --git a/mpn/power/rshift.s b/mpn/power/rshift.s
deleted file mode 100644
index e95cf7dca..000000000
--- a/mpn/power/rshift.s
+++ /dev/null
@@ -1,54 +0,0 @@
-# IBM POWER __gmpn_rshift -- 
-
-# Copyright 1992, 1994, 1999, 2000 Free Software Foundation, Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
-# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
-# MA 02111-1307, USA.
-
-
-# INPUT PARAMETERS
-# res_ptr	r3
-# s_ptr		r4
-# size		r5
-# cnt		r6
-
-	.toc
-	.globl	__gmpn_rshift
-	.globl	.__gmpn_rshift
-	.csect	__gmpn_rshift[DS]
-__gmpn_rshift:
-	.long	.__gmpn_rshift, TOC[tc0], 0
-	.csect	.text[PR]
-	.align	2
-.__gmpn_rshift:
-	sfi	8,6,32
-	mtctr	5		# put limb count in CTR loop register
-	l	0,0(4)		# read least significant limb
-	ai	9,3,-4		# adjust res_ptr since it's offset in the stu:s
-	sle	3,0,8		# compute carry limb, and init MQ register
-	bdz	Lend2		# if just one limb, skip loop
-	lu	0,4(4)		# read 2:nd least significant limb
-	sleq	7,0,8		# compute least significant limb of result
-	bdz	Lend		# if just two limb, skip loop
-Loop:	lu	0,4(4)		# load next higher limb
-	stu	7,4(9)		# store previous result during read latency
-	sleq	7,0,8		# compute result limb
-	bdn	Loop		# loop back until CTR is zero
-Lend:	stu	7,4(9)		# store 2:nd most significant limb
-Lend2:	sre	7,0,6		# compute most significant limb
-	st      7,4(9)		# store it"				\
-	br
diff --git a/mpn/power/sdiv.asm b/mpn/power/sdiv.asm
new file mode 100644
index 000000000..75bcbb790
--- /dev/null
+++ b/mpn/power/sdiv.asm
@@ -0,0 +1,30 @@
+dnl  Copyright 1999, 2001 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl  MA 02111-1307, USA.
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_sdiv_qrnnd)
+	mtmq	5
+	div	0,4,6
+	mfmq	9
+	st	9,0(3)
+	mr	3,0
+	br
+EPILOGUE(mpn_sdiv_qrnnd)
diff --git a/mpn/power/sdiv.s b/mpn/power/sdiv.s
deleted file mode 100644
index a6ca4246e..000000000
--- a/mpn/power/sdiv.s
+++ /dev/null
@@ -1,34 +0,0 @@
-# Copyright 1999 Free Software Foundation, Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
-# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
-# MA 02111-1307, USA.
-
-	.toc
-	.globl	__sdiv_qrnnd
-	.globl	.__sdiv_qrnnd
-	.csect	__sdiv_qrnnd[DS]
-__sdiv_qrnnd:
-	.long	.__sdiv_qrnnd, TOC[tc0], 0
-	.csect	.text[PR]
-	.align	2
-.__sdiv_qrnnd:
-	mtmq	5
-	div	0,4,6
-	mfmq	9
-	st	9,0(3)
-	mr	3,0
-	br
diff --git a/mpn/power/sub_n.asm b/mpn/power/sub_n.asm
new file mode 100644
index 000000000..e4e9892db
--- /dev/null
+++ b/mpn/power/sub_n.asm
@@ -0,0 +1,77 @@
+dnl  IBM POWER mpn_sub_n -- Subtract two limb vectors of equal, non-zero
+dnl  length.
+
+dnl  Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2001 Free Software
+dnl  Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl  MA 02111-1307, USA.
+
+
+dnl  INPUT PARAMETERS
+dnl  res_ptr	r3
+dnl  s1_ptr	r4
+dnl  s2_ptr	r5
+dnl  size	r6
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_sub_n)
+	andil.	10,6,1		C odd or even number of limbs?
+	l	8,0(4)		C load least significant s1 limb
+	l	0,0(5)		C load least significant s2 limb
+	cal	3,-4(3)		C offset res_ptr, it's updated before it's used
+	sri	10,6,1		C count for unrolled loop
+	sf	7,0,8		C subtract least significant limbs, set cy
+	mtctr	10		C copy count into CTR
+	beq	0,Leven		C branch if even C of limbs (C of limbs >= 2)
+
+C We have an odd C of limbs.  Add the first limbs separately.
+	cmpi	1,10,0		C is count for unrolled loop zero?
+	bc	4,6,L1		C bne cr1,L1 (misassembled by gas)
+	st	7,4(3)
+	sfe	3,0,0		C load !cy into ...
+	sfi	3,3,0		C ... return value register
+	br			C return
+
+C We added least significant limbs.  Now reload the next limbs to enter loop.
+L1:	lu	8,4(4)		C load s1 limb and update s1_ptr
+	lu	0,4(5)		C load s2 limb and update s2_ptr
+	stu	7,4(3)
+	sfe	7,0,8		C subtract limbs, set cy
+Leven:	lu	9,4(4)		C load s1 limb and update s1_ptr
+	lu	10,4(5)		C load s2 limb and update s2_ptr
+	bdz	Lend		C If done, skip loop
+
+Loop:	lu	8,4(4)		C load s1 limb and update s1_ptr
+	lu	0,4(5)		C load s2 limb and update s2_ptr
+	sfe	11,10,9		C subtract previous limbs with cy, set cy
+	stu	7,4(3)		C 
+	lu	9,4(4)		C load s1 limb and update s1_ptr
+	lu	10,4(5)		C load s2 limb and update s2_ptr
+	sfe	7,0,8		C subtract previous limbs with cy, set cy
+	stu	11,4(3)		C 
+	bdn	Loop		C decrement CTR and loop back
+
+Lend:	sfe	11,10,9		C subtract limbs with cy, set cy
+	st	7,4(3)		C 
+	st	11,8(3)		C 
+	sfe	3,0,0		C load !cy into ...
+	sfi	3,3,0		C ... return value register
+	br
+EPILOGUE(mpn_sub_n)
diff --git a/mpn/power/sub_n.s b/mpn/power/sub_n.s
deleted file mode 100644
index a8ecd204f..000000000
--- a/mpn/power/sub_n.s
+++ /dev/null
@@ -1,80 +0,0 @@
-# IBM POWER __gmpn_sub_n -- Subtract two limb vectors of equal, non-zero length.
-
-# Copyright 1992, 1994, 1995, 1996, 1999, 2000 Free Software Foundation,
-# Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
-# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
-# MA 02111-1307, USA.
-
-
-# INPUT PARAMETERS
-# res_ptr	r3
-# s1_ptr	r4
-# s2_ptr	r5
-# size		r6
-
-	.toc
-	.globl	__gmpn_sub_n
-	.globl	.__gmpn_sub_n
-	.csect	__gmpn_sub_n[DS]
-__gmpn_sub_n:
-	.long	.__gmpn_sub_n, TOC[tc0], 0
-	.csect	.text[PR]
-	.align	2
-.__gmpn_sub_n:
-	andil.	10,6,1		# odd or even number of limbs?
-	l	8,0(4)		# load least significant s1 limb
-	l	0,0(5)		# load least significant s2 limb
-	cal	3,-4(3)		# offset res_ptr, it's updated before it's used
-	sri	10,6,1		# count for unrolled loop
-	sf	7,0,8		# subtract least significant limbs, set cy
-	mtctr	10		# copy count into CTR
-	beq	0,Leven		# branch if even # of limbs (# of limbs >= 2)
-
-# We have an odd # of limbs.  Add the first limbs separately.
-	cmpi	1,10,0		# is count for unrolled loop zero?
-	bc	4,6,L1		# bne cr1,L1 (misassembled by gas)
-	st	7,4(3)
-	sfe	3,0,0		# load !cy into ...
-	sfi	3,3,0		# ... return value register
-	br			# return
-
-# We added least significant limbs.  Now reload the next limbs to enter loop.
-L1:	lu	8,4(4)		# load s1 limb and update s1_ptr
-	lu	0,4(5)		# load s2 limb and update s2_ptr
-	stu	7,4(3)
-	sfe	7,0,8		# subtract limbs, set cy
-Leven:	lu	9,4(4)		# load s1 limb and update s1_ptr
-	lu	10,4(5)		# load s2 limb and update s2_ptr
-	bdz	Lend		# If done, skip loop
-
-Loop:	lu	8,4(4)		# load s1 limb and update s1_ptr
-	lu	0,4(5)		# load s2 limb and update s2_ptr
-	sfe	11,10,9		# subtract previous limbs with cy, set cy
-	stu	7,4(3)		# 
-	lu	9,4(4)		# load s1 limb and update s1_ptr
-	lu	10,4(5)		# load s2 limb and update s2_ptr
-	sfe	7,0,8		# subtract previous limbs with cy, set cy
-	stu	11,4(3)		# 
-	bdn	Loop		# decrement CTR and loop back
-
-Lend:	sfe	11,10,9		# subtract limbs with cy, set cy
-	st	7,4(3)		# 
-	st	11,8(3)		# 
-	sfe	3,0,0		# load !cy into ...
-	sfi	3,3,0		# ... return value register
-	br
diff --git a/mpn/power/submul_1.asm b/mpn/power/submul_1.asm
new file mode 100644
index 000000000..70f685ac2
--- /dev/null
+++ b/mpn/power/submul_1.asm
@@ -0,0 +1,122 @@
+dnl  IBM POWER mpn_submul_1 -- Multiply a limb vector with a limb and subtract
+dnl  the result from a second limb vector.
+
+dnl  Copyright 1992, 1994, 1999, 2000, 2001 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl  MA 02111-1307, USA.
+
+
+dnl  INPUT PARAMETERS
+dnl  res_ptr	r3
+dnl  s1_ptr	r4
+dnl  size	r5
+dnl  s2_limb	r6
+
+dnl  The POWER architecture has no unsigned 32x32->64 bit multiplication
+dnl  instruction.  To obtain that operation, we have to use the 32x32->64
+dnl  signed multiplication instruction, and add the appropriate compensation to
+dnl  the high limb of the result.  We add the multiplicand if the multiplier
+dnl  has its most significant bit set, and we add the multiplier if the
+dnl  multiplicand has its most significant bit set.  We need to preserve the
+dnl  carry flag between each iteration, so we have to compute the compensation
+dnl  carefully (the natural, srai+and doesn't work).  Since all POWER can
+dnl  branch in zero cycles, we use conditional branches to for the additions.
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+	cal	3,-4(3)
+	l	0,0(4)
+	cmpi	0,6,0
+	mtctr	5
+	mul	9,0,6
+	srai	7,0,31
+	and	7,7,6
+	mfmq	11
+	cax	9,9,7
+	l	7,4(3)
+	sf	8,11,7		C add res_limb
+	a	11,8,11		C invert cy (r11 is junk)
+	blt	Lneg
+Lpos:	bdz	Lend
+
+Lploop:	lu	0,4(4)
+	stu	8,4(3)
+	cmpi	0,0,0
+	mul	10,0,6
+	mfmq	0
+	ae	11,0,9		C low limb + old_cy_limb + old cy
+	l	7,4(3)
+	aze	10,10		C propagate cy to new cy_limb
+	sf	8,11,7		C add res_limb
+	a	11,8,11		C invert cy (r11 is junk)
+	bge	Lp0
+	cax	10,10,6		C adjust high limb for negative limb from s1
+Lp0:	bdz	Lend0
+	lu	0,4(4)
+	stu	8,4(3)
+	cmpi	0,0,0
+	mul	9,0,6
+	mfmq	0
+	ae	11,0,10
+	l	7,4(3)
+	aze	9,9
+	sf	8,11,7
+	a	11,8,11		C invert cy (r11 is junk)
+	bge	Lp1
+	cax	9,9,6		C adjust high limb for negative limb from s1
+Lp1:	bdn	Lploop
+
+	b	Lend
+
+Lneg:	cax	9,9,0
+	bdz	Lend
+Lnloop:	lu	0,4(4)
+	stu	8,4(3)
+	cmpi	0,0,0
+	mul	10,0,6
+	mfmq	7
+	ae	11,7,9
+	l	7,4(3)
+	ae	10,10,0		C propagate cy to new cy_limb
+	sf	8,11,7		C add res_limb
+	a	11,8,11		C invert cy (r11 is junk)
+	bge	Ln0
+	cax	10,10,6		C adjust high limb for negative limb from s1
+Ln0:	bdz	Lend0
+	lu	0,4(4)
+	stu	8,4(3)
+	cmpi	0,0,0
+	mul	9,0,6
+	mfmq	7
+	ae	11,7,10
+	l	7,4(3)
+	ae	9,9,0		C propagate cy to new cy_limb
+	sf	8,11,7		C add res_limb
+	a	11,8,11		C invert cy (r11 is junk)
+	bge	Ln1
+	cax	9,9,6		C adjust high limb for negative limb from s1
+Ln1:	bdn	Lnloop
+	b	Lend
+
+Lend0:	cal	9,0(10)
+Lend:	st	8,4(3)
+	aze	3,9
+	br
+EPILOGUE(mpn_submul_1)
diff --git a/mpn/power/submul_1.s b/mpn/power/submul_1.s
deleted file mode 100644
index 972bf876c..000000000
--- a/mpn/power/submul_1.s
+++ /dev/null
@@ -1,127 +0,0 @@
-# IBM POWER __gmpn_submul_1 -- Multiply a limb vector with a limb and subtract
-# the result from a second limb vector.
-
-# Copyright 1992, 1994, 1999, 2000 Free Software Foundation, Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
-# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
-# MA 02111-1307, USA.
-
-
-# INPUT PARAMETERS
-# res_ptr	r3
-# s1_ptr	r4
-# size		r5
-# s2_limb	r6
-
-# The POWER architecture has no unsigned 32x32->64 bit multiplication
-# instruction.  To obtain that operation, we have to use the 32x32->64 signed
-# multiplication instruction, and add the appropriate compensation to the high
-# limb of the result.  We add the multiplicand if the multiplier has its most
-# significant bit set, and we add the multiplier if the multiplicand has its
-# most significant bit set.  We need to preserve the carry flag between each
-# iteration, so we have to compute the compensation carefully (the natural,
-# srai+and doesn't work).  Since the POWER architecture has a branch unit we
-# can branch in zero cycles, so that's how we perform the additions.
-
-	.toc
-	.globl	__gmpn_submul_1
-	.globl	.__gmpn_submul_1
-	.csect	__gmpn_submul_1[DS]
-__gmpn_submul_1:
-	.long	.__gmpn_submul_1, TOC[tc0], 0
-	.csect	.text[PR]
-	.align	2
-.__gmpn_submul_1:
-
-	cal	3,-4(3)
-	l	0,0(4)
-	cmpi	0,6,0
-	mtctr	5
-	mul	9,0,6
-	srai	7,0,31
-	and	7,7,6
-	mfmq	11
-	cax	9,9,7
-	l	7,4(3)
-	sf	8,11,7		# add res_limb
-	a	11,8,11		# invert cy (r11 is junk)
-	blt	Lneg
-Lpos:	bdz	Lend
-
-Lploop:	lu	0,4(4)
-	stu	8,4(3)
-	cmpi	0,0,0
-	mul	10,0,6
-	mfmq	0
-	ae	11,0,9		# low limb + old_cy_limb + old cy
-	l	7,4(3)
-	aze	10,10		# propagate cy to new cy_limb
-	sf	8,11,7		# add res_limb
-	a	11,8,11		# invert cy (r11 is junk)
-	bge	Lp0
-	cax	10,10,6		# adjust high limb for negative limb from s1
-Lp0:	bdz	Lend0
-	lu	0,4(4)
-	stu	8,4(3)
-	cmpi	0,0,0
-	mul	9,0,6
-	mfmq	0
-	ae	11,0,10
-	l	7,4(3)
-	aze	9,9
-	sf	8,11,7
-	a	11,8,11		# invert cy (r11 is junk)
-	bge	Lp1
-	cax	9,9,6		# adjust high limb for negative limb from s1
-Lp1:	bdn	Lploop
-
-	b	Lend
-
-Lneg:	cax	9,9,0
-	bdz	Lend
-Lnloop:	lu	0,4(4)
-	stu	8,4(3)
-	cmpi	0,0,0
-	mul	10,0,6
-	mfmq	7
-	ae	11,7,9
-	l	7,4(3)
-	ae	10,10,0		# propagate cy to new cy_limb
-	sf	8,11,7		# add res_limb
-	a	11,8,11		# invert cy (r11 is junk)
-	bge	Ln0
-	cax	10,10,6		# adjust high limb for negative limb from s1
-Ln0:	bdz	Lend0
-	lu	0,4(4)
-	stu	8,4(3)
-	cmpi	0,0,0
-	mul	9,0,6
-	mfmq	7
-	ae	11,7,10
-	l	7,4(3)
-	ae	9,9,0		# propagate cy to new cy_limb
-	sf	8,11,7		# add res_limb
-	a	11,8,11		# invert cy (r11 is junk)
-	bge	Ln1
-	cax	9,9,6		# adjust high limb for negative limb from s1
-Ln1:	bdn	Lnloop
-	b	Lend
-
-Lend0:	cal	9,0(10)
-Lend:	st	8,4(3)
-	aze	3,9
-	br
diff --git a/mpn/power/umul.asm b/mpn/power/umul.asm
new file mode 100644
index 000000000..82eb6ee8e
--- /dev/null
+++ b/mpn/power/umul.asm
@@ -0,0 +1,34 @@
+dnl  Copyright 1999, 2001 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl  MA 02111-1307, USA.
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_umul_ppmm)
+	mul	9,4,5
+	srai	0,4,31
+	and	0,0,5
+	srai	5,5,31
+	and	5,5,4
+	cax	0,0,5
+	mfmq	11
+	st	11,0(3)
+	cax	3,9,0
+	br
+EPILOGUE(mpn_umul_ppmm)
diff --git a/mpn/power/umul.s b/mpn/power/umul.s
deleted file mode 100644
index f2f85503c..000000000
--- a/mpn/power/umul.s
+++ /dev/null
@@ -1,38 +0,0 @@
-# Copyright 1999 Free Software Foundation, Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
-# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
-# MA 02111-1307, USA.
-
-	.toc
-	.globl	__umul_ppmm
-	.globl	.__umul_ppmm
-	.csect	__umul_ppmm[DS]
-__umul_ppmm:
-	.long	.__umul_ppmm, TOC[tc0], 0
-	.csect	.text[PR]
-	.align	2
-.__umul_ppmm:
-	mul	9,4,5
-	srai	0,4,31
-	and	0,0,5
-	srai	5,5,31
-	and	5,5,4
-	cax	0,0,5
-	mfmq	11
-	st	11,0(3)
-	cax	3,9,0
-	br
author	tege <tege@gmplib.org>	2001-02-10 01:46:50 +0100
committer	tege <tege@gmplib.org>	2001-02-10 01:46:50 +0100
commit	f1f0a217a06cbdd9b541c8d84297aaa840a6ad93 (patch)
tree	1833943d05f7316c17c6acf95019b43d0808fb41 /mpn/power
parent	ab64a1b0078eba23d2d9d44026dd05540e17db71 (diff)
download	gmp-f1f0a217a06cbdd9b541c8d84297aaa840a6ad93.tar.gz