* Convert `.s' files to `.asm'.

* Port to Cray T3D/E systems.
author: tege <tege@gmplib.org> 2000-03-17 07:08:03 +0100
committer: tege <tege@gmplib.org> 2000-03-17 07:08:03 +0100
commit: 106b678e54ca07182002077f4421890710626aa4 (patch)
tree: d3fe95a81926ddd8bc57a083ec50485ffaffee65 /mpn/alpha
parent: ef6177983d51165d7e0ef15a52b63fea72169805 (diff)
download: gmp-106b678e54ca07182002077f4421890710626aa4.tar.gz
27 files changed, 1489 insertions, 1527 deletions
diff --git a/mpn/alpha/README b/mpn/alpha/README
index fd9f78c79..a03153922 100644
--- a/mpn/alpha/README
+++ b/mpn/alpha/README
@@ -1,5 +1,15 @@
 This directory contains mpn functions optimized for DEC Alpha processors.
 
+ALPHA ASSEMBLY RULES AND REGULATIONS
+
+The `.prologue N' pseudo op marks the end of instruction that needs
+special handling by unwinding.  It also says whether $27 is really
+needed for computing the gp.  The `.mask M' pseudo op says which
+registers are saved on the stack, and at what offset in the frame.
+
+Cray code is very very different...
+
+
 RELEVANT OPTIMIZATION ISSUES
 
 EV4
diff --git a/mpn/alpha/add_n.asm b/mpn/alpha/add_n.asm
new file mode 100644
index 000000000..1abfd2d42
--- /dev/null
+++ b/mpn/alpha/add_n.asm
@@ -0,0 +1,114 @@
+dnl  Alpha mpn_add_n -- Add two limb vectors of the same length > 0 and
+dnl  store sum in a third limb vector.
+
+dnl  Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Library General Public License as published by
+dnl  the Free Software Foundation; either version 2 of the License, or (at your
+dnl  option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Library General Public License
+dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl  MA 02111-1307, USA.
+
+include(`../config.m4')
+
+dnl  INPUT PARAMETERS
+dnl  res_ptr	r16
+dnl  s1_ptr	r17
+dnl  s2_ptr	r18
+dnl  size	r19
+
+ASM_START()
+PROLOGUE(mpn_add_n)
+	ldq	r3,0(r17)
+	ldq	r4,0(r18)
+
+	subq	r19,1,r19
+	and	r19,4-1,r2	C number of limbs in first loop
+	bis	r31,r31,r0
+	beq	r2,$L0		C if multiple of 4 limbs, skip first loop
+
+	subq	r19,r2,r19
+
+$Loop0:	subq	r2,1,r2
+	ldq	r5,8(r17)
+	addq	r4,r0,r4
+	ldq	r6,8(r18)
+	cmpult	r4,r0,r1
+	addq	r3,r4,r4
+	cmpult	r4,r3,r0
+	stq	r4,0(r16)
+	bis	r0,r1,r0
+
+	addq	r17,8,r17
+	addq	r18,8,r18
+	bis	r5,r5,r3
+	bis	r6,r6,r4
+	addq	r16,8,r16
+	bne	r2,$Loop0
+
+$L0:	beq	r19,$Lend
+
+	ALIGN(8)
+$Loop:	subq	r19,4,r19
+
+	ldq	r5,8(r17)
+	addq	r4,r0,r4
+	ldq	r6,8(r18)
+	cmpult	r4,r0,r1
+	addq	r3,r4,r4
+	cmpult	r4,r3,r0
+	stq	r4,0(r16)
+	bis	r0,r1,r0
+
+	ldq	r3,16(r17)
+	addq	r6,r0,r6
+	ldq	r4,16(r18)
+	cmpult	r6,r0,r1
+	addq	r5,r6,r6
+	cmpult	r6,r5,r0
+	stq	r6,8(r16)
+	bis	r0,r1,r0
+
+	ldq	r5,24(r17)
+	addq	r4,r0,r4
+	ldq	r6,24(r18)
+	cmpult	r4,r0,r1
+	addq	r3,r4,r4
+	cmpult	r4,r3,r0
+	stq	r4,16(r16)
+	bis	r0,r1,r0
+
+	ldq	r3,32(r17)
+	addq	r6,r0,r6
+	ldq	r4,32(r18)
+	cmpult	r6,r0,r1
+	addq	r5,r6,r6
+	cmpult	r6,r5,r0
+	stq	r6,24(r16)
+	bis	r0,r1,r0
+
+	addq	r17,32,r17
+	addq	r18,32,r18
+	addq	r16,32,r16
+	bne	r19,$Loop
+
+$Lend:	addq	r4,r0,r4
+	cmpult	r4,r0,r1
+	addq	r3,r4,r4
+	cmpult	r4,r3,r0
+	stq	r4,0(r16)
+	bis	r0,r1,r0
+	ret	r31,(r26),1
+EPILOGUE(mpn_add_n)
+ASM_END()
diff --git a/mpn/alpha/add_n.s b/mpn/alpha/add_n.s
deleted file mode 100644
index 426556e39..000000000
--- a/mpn/alpha/add_n.s
+++ /dev/null
@@ -1,120 +0,0 @@
- # Alpha __mpn_add_n -- Add two limb vectors of the same length > 0 and
- # store sum in a third limb vector.
-
- # Copyright (C) 1995 Free Software Foundation, Inc.
-
- # This file is part of the GNU MP Library.
-
- # The GNU MP Library is free software; you can redistribute it and/or modify
- # it under the terms of the GNU Library General Public License as published by
- # the Free Software Foundation; either version 2 of the License, or (at your
- # option) any later version.
-
- # The GNU MP Library is distributed in the hope that it will be useful, but
- # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
- # License for more details.
-
- # You should have received a copy of the GNU Library General Public License
- # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
- # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
- # MA 02111-1307, USA.
-
-
- # INPUT PARAMETERS
- # res_ptr	$16
- # s1_ptr	$17
- # s2_ptr	$18
- # size		$19
-
-	.set	noreorder
-	.set	noat
-.text
-	.align	3
-	.globl	__mpn_add_n
-	.ent	__mpn_add_n
-__mpn_add_n:
-	.frame	$30,0,$26,0
-
-	ldq	$3,0($17)
-	ldq	$4,0($18)
-
-	subq	$19,1,$19
-	and	$19,4-1,$2	# number of limbs in first loop
-	bis	$31,$31,$0
-	beq	$2,.L0		# if multiple of 4 limbs, skip first loop
-
-	subq	$19,$2,$19
-
-.Loop0:	subq	$2,1,$2
-	ldq	$5,8($17)
-	addq	$4,$0,$4
-	ldq	$6,8($18)
-	cmpult	$4,$0,$1
-	addq	$3,$4,$4
-	cmpult	$4,$3,$0
-	stq	$4,0($16)
-	or	$0,$1,$0
-
-	addq	$17,8,$17
-	addq	$18,8,$18
-	bis	$5,$5,$3
-	bis	$6,$6,$4
-	addq	$16,8,$16
-	bne	$2,.Loop0
-
-.L0:	beq	$19,.Lend
-
-	.align	3
-.Loop:	subq	$19,4,$19
-
-	ldq	$5,8($17)
-	addq	$4,$0,$4
-	ldq	$6,8($18)
-	cmpult	$4,$0,$1
-	addq	$3,$4,$4
-	cmpult	$4,$3,$0
-	stq	$4,0($16)
-	or	$0,$1,$0
-
-	ldq	$3,16($17)
-	addq	$6,$0,$6
-	ldq	$4,16($18)
-	cmpult	$6,$0,$1
-	addq	$5,$6,$6
-	cmpult	$6,$5,$0
-	stq	$6,8($16)
-	or	$0,$1,$0
-
-	ldq	$5,24($17)
-	addq	$4,$0,$4
-	ldq	$6,24($18)
-	cmpult	$4,$0,$1
-	addq	$3,$4,$4
-	cmpult	$4,$3,$0
-	stq	$4,16($16)
-	or	$0,$1,$0
-
-	ldq	$3,32($17)
-	addq	$6,$0,$6
-	ldq	$4,32($18)
-	cmpult	$6,$0,$1
-	addq	$5,$6,$6
-	cmpult	$6,$5,$0
-	stq	$6,24($16)
-	or	$0,$1,$0
-
-	addq	$17,32,$17
-	addq	$18,32,$18
-	addq	$16,32,$16
-	bne	$19,.Loop
-
-.Lend:	addq	$4,$0,$4
-	cmpult	$4,$0,$1
-	addq	$3,$4,$4
-	cmpult	$4,$3,$0
-	stq	$4,0($16)
-	or	$0,$1,$0
-	ret	$31,($26),1
-
-	.end	__mpn_add_n
diff --git a/mpn/alpha/addmul_1.asm b/mpn/alpha/addmul_1.asm
new file mode 100644
index 000000000..0a42326b3
--- /dev/null
+++ b/mpn/alpha/addmul_1.asm
@@ -0,0 +1,87 @@
+dnl Alpha __mpn_addmul_1 -- Multiply a limb vector with a limb and add
+dnl the result to a second limb vector.
+
+dnl  Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Library General Public License as published
+dnl  by the Free Software Foundation; either version 2 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Library General Public License
+dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl  MA 02111-1307, USA.
+
+include(`../config.m4')
+
+dnl  INPUT PARAMETERS
+dnl  res_ptr	r16
+dnl  s1_ptr	r17
+dnl  size	r18
+dnl  s2_limb	r19
+
+dnl  This code runs at 42 cycles/limb on EV4, 18 cycles/limb on EV5, and 7
+dnl  cycles/limb on EV6.
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+	ldq	r2,0(r17)	C r2 = s1_limb
+	addq	r17,8,r17	C s1_ptr++
+	subq	r18,1,r18	C size--
+	mulq	r2,r19,r3	C r3 = prod_low
+	ldq	r5,0(r16)	C r5 = *res_ptr
+	umulh	r2,r19,r0	C r0 = prod_high
+	beq	r18,$Lend1	C jump if size was == 1
+	ldq	r2,0(r17)	C r2 = s1_limb
+	addq	r17,8,r17	C s1_ptr++
+	subq	r18,1,r18	C size--
+	addq	r5,r3,r3
+	cmpult	r3,r5,r4
+	stq	r3,0(r16)
+	addq	r16,8,r16	C res_ptr++
+	beq	r18,$Lend2	C jump if size was == 2
+
+	ALIGN(8)
+$Loop:	mulq	r2,r19,r3	C r3 = prod_low
+	ldq	r5,0(r16)	C r5 = *res_ptr
+	addq	r4,r0,r0	C cy_limb = cy_limb + 'cy'
+	subq	r18,1,r18	C size--
+	umulh	r2,r19,r4	C r4 = cy_limb
+	ldq	r2,0(r17)	C r2 = s1_limb
+	addq	r17,8,r17	C s1_ptr++
+	addq	r3,r0,r3	C r3 = cy_limb + prod_low
+	cmpult	r3,r0,r0	C r0 = carry from (cy_limb + prod_low)
+	addq	r5,r3,r3
+	cmpult	r3,r5,r5
+	stq	r3,0(r16)
+	addq	r16,8,r16	C res_ptr++
+	addq	r5,r0,r0	C combine carries
+	bne	r18,$Loop
+
+$Lend2:	mulq	r2,r19,r3	C r3 = prod_low
+	ldq	r5,0(r16)	C r5 = *res_ptr
+	addq	r4,r0,r0	C cy_limb = cy_limb + 'cy'
+	umulh	r2,r19,r4	C r4 = cy_limb
+	addq	r3,r0,r3	C r3 = cy_limb + prod_low
+	cmpult	r3,r0,r0	C r0 = carry from (cy_limb + prod_low)
+	addq	r5,r3,r3
+	cmpult	r3,r5,r5
+	stq	r3,0(r16)
+	addq	r5,r0,r0	C combine carries
+	addq	r4,r0,r0	C cy_limb = prod_high + cy
+	ret	r31,(r26),1
+$Lend1:	addq	r5,r3,r3
+	cmpult	r3,r5,r5
+	stq	r3,0(r16)
+	addq	r0,r5,r0
+	ret	r31,(r26),1
+EPILOGUE(mpn_addmul_1)
+ASM_END()
diff --git a/mpn/alpha/addmul_1.s b/mpn/alpha/addmul_1.s
deleted file mode 100644
index 8513c13f5..000000000
--- a/mpn/alpha/addmul_1.s
+++ /dev/null
@@ -1,92 +0,0 @@
- # Alpha __mpn_addmul_1 -- Multiply a limb vector with a limb and add
- # the result to a second limb vector.
-
- # Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
-
- # This file is part of the GNU MP Library.
-
- # The GNU MP Library is free software; you can redistribute it and/or modify
- # it under the terms of the GNU Library General Public License as published by
- # the Free Software Foundation; either version 2 of the License, or (at your
- # option) any later version.
-
- # The GNU MP Library is distributed in the hope that it will be useful, but
- # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
- # License for more details.
-
- # You should have received a copy of the GNU Library General Public License
- # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
- # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
- # MA 02111-1307, USA.
-
-
- # INPUT PARAMETERS
- # res_ptr	r16
- # s1_ptr	r17
- # size		r18
- # s2_limb	r19
-
- # This code runs at 42 cycles/limb on EV4 and 18 cycles/limb on EV5.
-
-	.set	noreorder
-	.set	noat
-.text
-	.align	3
-	.globl	__mpn_addmul_1
-	.ent	__mpn_addmul_1 2
-__mpn_addmul_1:
-	.frame	$30,0,$26
-
-	ldq	$2,0($17)	# $2 = s1_limb
-	addq	$17,8,$17	# s1_ptr++
-	subq	$18,1,$18	# size--
-	mulq	$2,$19,$3	# $3 = prod_low
-	ldq	$5,0($16)	# $5 = *res_ptr
-	umulh	$2,$19,$0	# $0 = prod_high
-	beq	$18,.Lend1	# jump if size was == 1
-	ldq	$2,0($17)	# $2 = s1_limb
-	addq	$17,8,$17	# s1_ptr++
-	subq	$18,1,$18	# size--
-	addq	$5,$3,$3
-	cmpult	$3,$5,$4
-	stq	$3,0($16)
-	addq	$16,8,$16	# res_ptr++
-	beq	$18,.Lend2	# jump if size was == 2
-
-	.align	3
-.Loop:	mulq	$2,$19,$3	# $3 = prod_low
-	ldq	$5,0($16)	# $5 = *res_ptr
-	addq	$4,$0,$0	# cy_limb = cy_limb + 'cy'
-	subq	$18,1,$18	# size--
-	umulh	$2,$19,$4	# $4 = cy_limb
-	ldq	$2,0($17)	# $2 = s1_limb
-	addq	$17,8,$17	# s1_ptr++
-	addq	$3,$0,$3	# $3 = cy_limb + prod_low
-	cmpult	$3,$0,$0	# $0 = carry from (cy_limb + prod_low)
-	addq	$5,$3,$3
-	cmpult	$3,$5,$5
-	stq	$3,0($16)
-	addq	$16,8,$16	# res_ptr++
-	addq	$5,$0,$0	# combine carries
-	bne	$18,.Loop
-
-.Lend2:	mulq	$2,$19,$3	# $3 = prod_low
-	ldq	$5,0($16)	# $5 = *res_ptr
-	addq	$4,$0,$0	# cy_limb = cy_limb + 'cy'
-	umulh	$2,$19,$4	# $4 = cy_limb
-	addq	$3,$0,$3	# $3 = cy_limb + prod_low
-	cmpult	$3,$0,$0	# $0 = carry from (cy_limb + prod_low)
-	addq	$5,$3,$3
-	cmpult	$3,$5,$5
-	stq	$3,0($16)
-	addq	$5,$0,$0	# combine carries
-	addq	$4,$0,$0	# cy_limb = prod_high + cy
-	ret	$31,($26),1
-.Lend1:	addq	$5,$3,$3
-	cmpult	$3,$5,$5
-	stq	$3,0($16)
-	addq	$0,$5,$0
-	ret	$31,($26),1
-
-	.end	__mpn_addmul_1
diff --git a/mpn/alpha/cntlz.s b/mpn/alpha/cntlz.s
deleted file mode 100644
index e0f57c121..000000000
--- a/mpn/alpha/cntlz.s
+++ /dev/null
@@ -1,70 +0,0 @@
- # Alpha auxiliary for longlong.h's count_leading_zeros
-
- # Copyright (C) 1997 Free Software Foundation, Inc.
-
- # This file is part of the GNU MP Library.
-
- # The GNU MP Library is free software; you can redistribute it and/or modify
- # it under the terms of the GNU Library General Public License as published by
- # the Free Software Foundation; either version 2 of the License, or (at your
- # option) any later version.
-
- # The GNU MP Library is distributed in the hope that it will be useful, but
- # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
- # License for more details.
-
- # You should have received a copy of the GNU Library General Public License
- # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
- # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
- # MA 02111-1307, USA.
-
- # DISCUSSION:
-
- # Other methods have been tried, and using a 128-entry table actually trims
- # about 10% of the execution time (on a 21164) when the table is in the L1
- # cache.  But under non-benchmarking conditions, the table will hardly be in
- # the L1 cache.  Tricky bit-fiddling methods with multiplies and magic tables
- # are also possible, but they require many more instructions than the current
- # code.  (But for count_trailing_zeros, such tricks are beneficial.)
- # Finally, converting to floating-point and extracting the exponent is much
- # slower.
-
-	.set	noreorder
-	.set	noat
-.text
-	.align	3
-	.globl	__count_leading_zeros
-	.ent	__count_leading_zeros 0
-__count_leading_zeros:
-	bis	$31,63,$0		# initialize partial result count
-
-	srl	$16,32,$1		# shift down 32 steps -> r1
-	cmovne	$1,$1,$16		# select r1 if non-zero
-	cmovne	$1,31,$0		# if r1 is nonzero choose smaller count
-
-	srl	$16,16,$1		# shift down 16 steps -> r1
-	subq	$0,16,$2		# generate new partial result count
-	cmovne	$1,$1,$16		# choose new r1 if non-zero
-	cmovne	$1,$2,$0		# choose new count if r1 was non-zero
-
-	srl	$16,8,$1
-	subq	$0,8,$2
-	cmovne	$1,$1,$16
-	cmovne	$1,$2,$0
-
-	srl	$16,4,$1
-	subq	$0,4,$2
-	cmovne	$1,$1,$16
-	cmovne	$1,$2,$0
-
-	srl	$16,2,$1
-	subq	$0,2,$2
-	cmovne	$1,$1,$16
-	cmovne	$1,$2,$0
-
-	srl	$16,1,$1		# extract bit 1
-	subq	$0,$1,$0		# subtract it from partial result
-
-	ret	$31,($26),1
-	.end	__count_leading_zeros
diff --git a/mpn/alpha/default.m4 b/mpn/alpha/default.m4
new file mode 100644
index 000000000..9b7e61a28
--- /dev/null
+++ b/mpn/alpha/default.m4
@@ -0,0 +1,56 @@
+divert(-1)
+
+define(`ASM_START',
+	`
+	.set noreorder
+	.set noat')
+
+define(`X',`0x$1')
+define(`INT64',
+	`
+	.align	3
+$1:	.quad	$2')
+
+define(`PROLOGUE',
+	`
+	.text
+	.align	3
+	.globl	$1
+	.ent	$1
+$1:
+	.frame r30,0,r26
+	.prologue 0')
+
+define(`PROLOGUE_GP',
+	`
+	.text
+	.align	3
+	.globl	$1
+	.ent	$1
+$1:
+	ldgp	r29,0(r27)
+	.frame	r30,0,r26
+	.prologue 1')
+
+define(`EPILOGUE',
+	`
+	.end	$1')
+
+dnl Map register names r0, r1, etc, to `$0', `$1', etc.
+dnl This is needed on all systems but Unicos
+forloop(i,0,31,
+`define(`r'i,``$''i)'
+)
+forloop(i,0,31,
+`define(`f'i,``$f''i)'
+)
+
+define(`DATASTART',
+	`dnl
+	DATA
+$1:')
+define(`DATAEND',`dnl')
+
+define(`ASM_END',`dnl')
+
+divert
diff --git a/mpn/alpha/ev5/add_n.asm b/mpn/alpha/ev5/add_n.asm
new file mode 100644
index 000000000..9b3484aa9
--- /dev/null
+++ b/mpn/alpha/ev5/add_n.asm
@@ -0,0 +1,143 @@
+dnl  Alpha EV5 __mpn_add_n -- Add two limb vectors of the same length > 0 and
+dnl  store sum in a third limb vector.
+
+dnl  Copyright (C) 1995, 1999, 2000 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Library General Public License as published by
+dnl  the Free Software Foundation; either version 2 of the License, or (at your
+dnl  option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Library General Public License
+dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl  MA 02111-1307, USA.
+
+include(`../config.m4')
+
+dnl  INPUT PARAMETERS
+dnl  res_ptr	r16
+dnl  s1_ptr	r17
+dnl  s2_ptr	r18
+dnl  size	r19
+
+ASM_START()
+PROLOGUE(mpn_add_n)
+	bis	r31,r31,r25		C clear cy
+	subq	r19,4,r19		C decr loop cnt
+	blt	r19,$Lend2		C if less than 4 limbs, goto 2nd loop
+C Start software pipeline for 1st loop
+	ldq	r0,0(r18)
+	ldq	r4,0(r17)
+	ldq	r1,8(r18)
+	ldq	r5,8(r17)
+	addq	r17,32,r17		C update s1_ptr
+	ldq	r2,16(r18)
+	addq	r0,r4,r20		C 1st main add
+	ldq	r3,24(r18)
+	subq	r19,4,r19		C decr loop cnt
+	ldq	r6,-16(r17)
+	cmpult	r20,r0,r25		C compute cy from last add
+	ldq	r7,-8(r17)
+	addq	r1,r5,r28		C 2nd main add
+	addq	r18,32,r18		C update s2_ptr
+	addq	r28,r25,r21		C 2nd carry add
+	cmpult	r28,r5,r8		C compute cy from last add
+	blt	r19,$Lend1		C if less than 4 limbs remain, jump
+C 1st loop handles groups of 4 limbs in a software pipeline
+	ALIGN(16)
+$Loop:	cmpult	r21,r28,r25		C compute cy from last add
+	ldq	r0,0(r18)
+	bis	r8,r25,r25		C combine cy from the two adds
+	ldq	r1,8(r18)
+	addq	r2,r6,r28		C 3rd main add
+	ldq	r4,0(r17)
+	addq	r28,r25,r22		C 3rd carry add
+	ldq	r5,8(r17)
+	cmpult	r28,r6,r8		C compute cy from last add
+	cmpult	r22,r28,r25		C compute cy from last add
+	stq	r20,0(r16)
+	bis	r8,r25,r25		C combine cy from the two adds
+	stq	r21,8(r16)
+	addq	r3,r7,r28		C 4th main add
+	addq	r28,r25,r23		C 4th carry add
+	cmpult	r28,r7,r8		C compute cy from last add
+	cmpult	r23,r28,r25		C compute cy from last add
+		addq	r17,32,r17		C update s1_ptr
+	bis	r8,r25,r25		C combine cy from the two adds
+		addq	r16,32,r16		C update res_ptr
+	addq	r0,r4,r28		C 1st main add
+	ldq	r2,16(r18)
+	addq	r25,r28,r20		C 1st carry add
+	ldq	r3,24(r18)
+	cmpult	r28,r4,r8		C compute cy from last add
+	ldq	r6,-16(r17)
+	cmpult	r20,r28,r25		C compute cy from last add
+	ldq	r7,-8(r17)
+	bis	r8,r25,r25		C combine cy from the two adds
+	subq	r19,4,r19		C decr loop cnt
+	stq	r22,-16(r16)
+	addq	r1,r5,r28		C 2nd main add
+	stq	r23,-8(r16)
+	addq	r25,r28,r21		C 2nd carry add
+		addq	r18,32,r18		C update s2_ptr
+	cmpult	r28,r5,r8		C compute cy from last add
+	bge	r19,$Loop
+C Finish software pipeline for 1st loop
+$Lend1:	cmpult	r21,r28,r25		C compute cy from last add
+	bis	r8,r25,r25		C combine cy from the two adds
+	addq	r2,r6,r28		C 3rd main add
+	addq	r28,r25,r22		C 3rd carry add
+	cmpult	r28,r6,r8		C compute cy from last add
+	cmpult	r22,r28,r25		C compute cy from last add
+	stq	r20,0(r16)
+	bis	r8,r25,r25		C combine cy from the two adds
+	stq	r21,8(r16)
+	addq	r3,r7,r28		C 4th main add
+	addq	r28,r25,r23		C 4th carry add
+	cmpult	r28,r7,r8		C compute cy from last add
+	cmpult	r23,r28,r25		C compute cy from last add
+	bis	r8,r25,r25		C combine cy from the two adds
+	addq	r16,32,r16		C update res_ptr
+	stq	r22,-16(r16)
+	stq	r23,-8(r16)
+$Lend2:	addq	r19,4,r19		C restore loop cnt
+	beq	r19,$Lret
+C Start software pipeline for 2nd loop
+	ldq	r0,0(r18)
+	ldq	r4,0(r17)
+	subq	r19,1,r19
+	beq	r19,$Lend0
+C 2nd loop handles remaining 1-3 limbs
+	ALIGN(16)
+$Loop0:	addq	r0,r4,r28		C main add
+	ldq	r0,8(r18)
+	cmpult	r28,r4,r8		C compute cy from last add
+	ldq	r4,8(r17)
+	addq	r28,r25,r20		C carry add
+	addq	r18,8,r18
+	addq	r17,8,r17
+	stq	r20,0(r16)
+	cmpult	r20,r28,r25		C compute cy from last add
+	subq	r19,1,r19		C decr loop cnt
+	bis	r8,r25,r25		C combine cy from the two adds
+	addq	r16,8,r16
+	bne	r19,$Loop0
+$Lend0:	addq	r0,r4,r28		C main add
+	addq	r28,r25,r20		C carry add
+	cmpult	r28,r4,r8		C compute cy from last add
+	cmpult	r20,r28,r25		C compute cy from last add
+	stq	r20,0(r16)
+	bis	r8,r25,r25		C combine cy from the two adds
+
+$Lret:	bis	r25,r31,r0		C return cy
+	ret	r31,(r26),1
+EPILOGUE(mpn_add_n)
+ASM_END()
diff --git a/mpn/alpha/ev5/add_n.s b/mpn/alpha/ev5/add_n.s
deleted file mode 100644
index 66bb9b9fb..000000000
--- a/mpn/alpha/ev5/add_n.s
+++ /dev/null
@@ -1,148 +0,0 @@
- # Alpha EV5 __mpn_add_n -- Add two limb vectors of the same length > 0 and
- # store sum in a third limb vector.
-
- # Copyright (C) 1995, 1999 Free Software Foundation, Inc.
-
- # This file is part of the GNU MP Library.
-
- # The GNU MP Library is free software; you can redistribute it and/or modify
- # it under the terms of the GNU Library General Public License as published by
- # the Free Software Foundation; either version 2 of the License, or (at your
- # option) any later version.
-
- # The GNU MP Library is distributed in the hope that it will be useful, but
- # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
- # License for more details.
-
- # You should have received a copy of the GNU Library General Public License
- # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
- # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
- # MA 02111-1307, USA.
-
-
- # INPUT PARAMETERS
- # res_ptr	$16
- # s1_ptr	$17
- # s2_ptr	$18
- # size		$19
-
-	.set	noreorder
-	.set	noat
-.text
-	.align	3
-	.globl	__mpn_add_n
-	.ent	__mpn_add_n
-__mpn_add_n:
-	.frame	$30,0,$26,0
-
-	or	$31,$31,$25		# clear cy
-	subq	$19,4,$19		# decr loop cnt
-	blt	$19,.Lend2		# if less than 4 limbs, goto 2nd loop
- # Start software pipeline for 1st loop
-	ldq	$0,0($18)
-	ldq	$4,0($17)
-	ldq	$1,8($18)
-	ldq	$5,8($17)
-	addq	$17,32,$17		# update s1_ptr
-	ldq	$2,16($18)
-	addq	$0,$4,$20		# 1st main add
-	ldq	$3,24($18)
-	subq	$19,4,$19		# decr loop cnt
-	ldq	$6,-16($17)
-	cmpult	$20,$0,$25		# compute cy from last add
-	ldq	$7,-8($17)
-	addq	$1,$5,$28		# 2nd main add
-	addq	$18,32,$18		# update s2_ptr
-	addq	$28,$25,$21		# 2nd carry add
-	cmpult	$28,$5,$8		# compute cy from last add
-	blt	$19,.Lend1		# if less than 4 limbs remain, jump
- # 1st loop handles groups of 4 limbs in a software pipeline
-	.align	4
-.Loop:	cmpult	$21,$28,$25		# compute cy from last add
-	ldq	$0,0($18)
-	or	$8,$25,$25		# combine cy from the two adds
-	ldq	$1,8($18)
-	addq	$2,$6,$28		# 3rd main add
-	ldq	$4,0($17)
-	addq	$28,$25,$22		# 3rd carry add
-	ldq	$5,8($17)
-	cmpult	$28,$6,$8		# compute cy from last add
-	cmpult	$22,$28,$25		# compute cy from last add
-	stq	$20,0($16)
-	or	$8,$25,$25		# combine cy from the two adds
-	stq	$21,8($16)
-	addq	$3,$7,$28		# 4th main add
-	addq	$28,$25,$23		# 4th carry add
-	cmpult	$28,$7,$8		# compute cy from last add
-	cmpult	$23,$28,$25		# compute cy from last add
-		addq	$17,32,$17		# update s1_ptr
-	or	$8,$25,$25		# combine cy from the two adds
-		addq	$16,32,$16		# update res_ptr
-	addq	$0,$4,$28		# 1st main add
-	ldq	$2,16($18)
-	addq	$25,$28,$20		# 1st carry add
-	ldq	$3,24($18)
-	cmpult	$28,$4,$8		# compute cy from last add
-	ldq	$6,-16($17)
-	cmpult	$20,$28,$25		# compute cy from last add
-	ldq	$7,-8($17)
-	or	$8,$25,$25		# combine cy from the two adds
-	subq	$19,4,$19		# decr loop cnt
-	stq	$22,-16($16)
-	addq	$1,$5,$28		# 2nd main add
-	stq	$23,-8($16)
-	addq	$25,$28,$21		# 2nd carry add
-		addq	$18,32,$18		# update s2_ptr
-	cmpult	$28,$5,$8		# compute cy from last add
-	bge	$19,.Loop
- # Finish software pipeline for 1st loop
-.Lend1:	cmpult	$21,$28,$25		# compute cy from last add
-	or	$8,$25,$25		# combine cy from the two adds
-	addq	$2,$6,$28		# 3rd main add
-	addq	$28,$25,$22		# 3rd carry add
-	cmpult	$28,$6,$8		# compute cy from last add
-	cmpult	$22,$28,$25		# compute cy from last add
-	stq	$20,0($16)
-	or	$8,$25,$25		# combine cy from the two adds
-	stq	$21,8($16)
-	addq	$3,$7,$28		# 4th main add
-	addq	$28,$25,$23		# 4th carry add
-	cmpult	$28,$7,$8		# compute cy from last add
-	cmpult	$23,$28,$25		# compute cy from last add
-	or	$8,$25,$25		# combine cy from the two adds
-	addq	$16,32,$16		# update res_ptr
-	stq	$22,-16($16)
-	stq	$23,-8($16)
-.Lend2:	addq	$19,4,$19		# restore loop cnt
-	beq	$19,.Lret
- # Start software pipeline for 2nd loop
-	ldq	$0,0($18)
-	ldq	$4,0($17)
-	subq	$19,1,$19
-	beq	$19,.Lend0
- # 2nd loop handles remaining 1-3 limbs
-	.align	4
-.Loop0:	addq	$0,$4,$28		# main add
-	ldq	$0,8($18)
-	cmpult	$28,$4,$8		# compute cy from last add
-	ldq	$4,8($17)
-	addq	$28,$25,$20		# carry add
-	addq	$18,8,$18
-	addq	$17,8,$17
-	stq	$20,0($16)
-	cmpult	$20,$28,$25		# compute cy from last add
-	subq	$19,1,$19		# decr loop cnt
-	or	$8,$25,$25		# combine cy from the two adds
-	addq	$16,8,$16
-	bne	$19,.Loop0
-.Lend0:	addq	$0,$4,$28		# main add
-	addq	$28,$25,$20		# carry add
-	cmpult	$28,$4,$8		# compute cy from last add
-	cmpult	$20,$28,$25		# compute cy from last add
-	stq	$20,0($16)
-	or	$8,$25,$25		# combine cy from the two adds
-
-.Lret:	or	$25,$31,$0		# return cy
-	ret	$31,($26),1
-	.end	__mpn_add_n
diff --git a/mpn/alpha/ev5/lshift.asm b/mpn/alpha/ev5/lshift.asm
new file mode 100644
index 000000000..23b9e8a10
--- /dev/null
+++ b/mpn/alpha/ev5/lshift.asm
@@ -0,0 +1,169 @@
+dnl  Alpha EV5 __mpn_lshift -- Shift a number left.
+
+dnl  Copyright (C) 1994, 1995, 2000 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Library General Public License as published by
+dnl  the Free Software Foundation; either version 2 of the License, or (at your
+dnl  option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Library General Public License
+dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl  MA 02111-1307, USA.
+
+include(`../config.m4')
+
+dnl  INPUT PARAMETERS
+dnl  res_ptr	r16
+dnl  s1_ptr	r17
+dnl  size	r18
+dnl  cnt	r19
+
+dnl  This code runs at 3.25 cycles/limb on the EV5.
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+	s8addq	r18,r17,r17	C make r17 point at end of s1
+	ldq	r4,-8(r17)	C load first limb
+	subq	r31,r19,r20
+	s8addq	r18,r16,r16	C make r16 point at end of RES
+	subq	r18,1,r18
+	and	r18,4-1,r28	C number of limbs in first loop
+	srl	r4,r20,r0	C compute function result
+
+	beq	r28,$L0
+	subq	r18,r28,r18
+
+	ALIGN(8)
+$Loop0:	ldq	r3,-16(r17)
+	subq	r16,8,r16
+	sll	r4,r19,r5
+	subq	r17,8,r17
+	subq	r28,1,r28
+	srl	r3,r20,r6
+	bis	r3,r3,r4
+	bis	r5,r6,r8
+	stq	r8,0(r16)
+	bne	r28,$Loop0
+
+$L0:	sll	r4,r19,r24
+	beq	r18,$Lend
+C warm up phase 1
+	ldq	r1,-16(r17)
+	subq	r18,4,r18
+	ldq	r2,-24(r17)
+	ldq	r3,-32(r17)
+	ldq	r4,-40(r17)
+	beq	r18,$Lend1
+C warm up phase 2
+	srl	r1,r20,r7
+	sll	r1,r19,r21
+	srl	r2,r20,r8
+	ldq	r1,-48(r17)
+	sll	r2,r19,r22
+	ldq	r2,-56(r17)
+	srl	r3,r20,r5
+	bis	r7,r24,r7
+	sll	r3,r19,r23
+	bis	r8,r21,r8
+	srl	r4,r20,r6
+	ldq	r3,-64(r17)
+	sll	r4,r19,r24
+	ldq	r4,-72(r17)
+	subq	r18,4,r18
+	beq	r18,$Lend2
+	ALIGN(16)
+C main loop
+$Loop:	stq	r7,-8(r16)
+	bis	r5,r22,r5
+	stq	r8,-16(r16)
+	bis	r6,r23,r6
+
+	srl	r1,r20,r7
+	subq	r18,4,r18
+	sll	r1,r19,r21
+	unop	C ldq	r31,-96(r17)
+
+	srl	r2,r20,r8
+	ldq	r1,-80(r17)
+	sll	r2,r19,r22
+	ldq	r2,-88(r17)
+
+	stq	r5,-24(r16)
+	bis	r7,r24,r7
+	stq	r6,-32(r16)
+	bis	r8,r21,r8
+
+	srl	r3,r20,r5
+	unop	C ldq	r31,-96(r17)
+	sll	r3,r19,r23
+	subq	r16,32,r16
+
+	srl	r4,r20,r6
+	ldq	r3,-96(r17)
+	sll	r4,r19,r24
+	ldq	r4,-104(r17)
+
+	subq	r17,32,r17
+	bne	r18,$Loop
+C cool down phase 2/1
+$Lend2:	stq	r7,-8(r16)
+	bis	r5,r22,r5
+	stq	r8,-16(r16)
+	bis	r6,r23,r6
+	srl	r1,r20,r7
+	sll	r1,r19,r21
+	srl	r2,r20,r8
+	sll	r2,r19,r22
+	stq	r5,-24(r16)
+	bis	r7,r24,r7
+	stq	r6,-32(r16)
+	bis	r8,r21,r8
+	srl	r3,r20,r5
+	sll	r3,r19,r23
+	srl	r4,r20,r6
+	sll	r4,r19,r24
+C cool down phase 2/2
+	stq	r7,-40(r16)
+	bis	r5,r22,r5
+	stq	r8,-48(r16)
+	bis	r6,r23,r6
+	stq	r5,-56(r16)
+	stq	r6,-64(r16)
+C cool down phase 2/3
+	stq	r24,-72(r16)
+	ret	r31,(r26),1
+
+C cool down phase 1/1
+$Lend1:	srl	r1,r20,r7
+	sll	r1,r19,r21
+	srl	r2,r20,r8
+	sll	r2,r19,r22
+	srl	r3,r20,r5
+	bis	r7,r24,r7
+	sll	r3,r19,r23
+	bis	r8,r21,r8
+	srl	r4,r20,r6
+	sll	r4,r19,r24
+C cool down phase 1/2
+	stq	r7,-8(r16)
+	bis	r5,r22,r5
+	stq	r8,-16(r16)
+	bis	r6,r23,r6
+	stq	r5,-24(r16)
+	stq	r6,-32(r16)
+	stq	r24,-40(r16)
+	ret	r31,(r26),1
+
+$Lend:	stq	r24,-8(r16)
+	ret	r31,(r26),1
+EPILOGUE(mpn_lshift)
+ASM_END()
diff --git a/mpn/alpha/ev5/lshift.s b/mpn/alpha/ev5/lshift.s
deleted file mode 100644
index ced55b720..000000000
--- a/mpn/alpha/ev5/lshift.s
+++ /dev/null
@@ -1,174 +0,0 @@
- # Alpha EV5 __mpn_lshift --
-
- # Copyright (C) 1994, 1995 Free Software Foundation, Inc.
-
- # This file is part of the GNU MP Library.
-
- # The GNU MP Library is free software; you can redistribute it and/or modify
- # it under the terms of the GNU Library General Public License as published by
- # the Free Software Foundation; either version 2 of the License, or (at your
- # option) any later version.
-
- # The GNU MP Library is distributed in the hope that it will be useful, but
- # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
- # License for more details.
-
- # You should have received a copy of the GNU Library General Public License
- # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
- # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
- # MA 02111-1307, USA.
-
-
- # INPUT PARAMETERS
- # res_ptr	r16
- # s1_ptr	r17
- # size		r18
- # cnt		r19
-
- # This code runs at 3.25 cycles/limb on the EV5.
-
-	.set	noreorder
-	.set	noat
-.text
-	.align	3
-	.globl	__mpn_lshift
-	.ent	__mpn_lshift
-__mpn_lshift:
-	.frame	$30,0,$26,0
-
-	s8addq	$18,$17,$17	# make r17 point at end of s1
-	ldq	$4,-8($17)	# load first limb
-	subq	$31,$19,$20
-	s8addq	$18,$16,$16	# make r16 point at end of RES
-	subq	$18,1,$18
-	and	$18,4-1,$28	# number of limbs in first loop
-	srl	$4,$20,$0	# compute function result
-
-	beq	$28,.L0
-	subq	$18,$28,$18
-
-	.align	3
-.Loop0:	ldq	$3,-16($17)
-	subq	$16,8,$16
-	sll	$4,$19,$5
-	subq	$17,8,$17
-	subq	$28,1,$28
-	srl	$3,$20,$6
-	or	$3,$3,$4
-	or	$5,$6,$8
-	stq	$8,0($16)
-	bne	$28,.Loop0
-
-.L0:	sll	$4,$19,$24
-	beq	$18,.Lend
- # warm up phase 1
-	ldq	$1,-16($17)
-	subq	$18,4,$18
-	ldq	$2,-24($17)
-	ldq	$3,-32($17)
-	ldq	$4,-40($17)
-	beq	$18,.Lend1
- # warm up phase 2
-	srl	$1,$20,$7
-	sll	$1,$19,$21
-	srl	$2,$20,$8
-	ldq	$1,-48($17)
-	sll	$2,$19,$22
-	ldq	$2,-56($17)
-	srl	$3,$20,$5
-	or	$7,$24,$7
-	sll	$3,$19,$23
-	or	$8,$21,$8
-	srl	$4,$20,$6
-	ldq	$3,-64($17)
-	sll	$4,$19,$24
-	ldq	$4,-72($17)
-	subq	$18,4,$18
-	beq	$18,.Lend2
-	.align  4
- # main loop
-.Loop:	stq	$7,-8($16)
-	or	$5,$22,$5
-	stq	$8,-16($16)
-	or	$6,$23,$6
-
-	srl	$1,$20,$7
-	subq	$18,4,$18
-	sll	$1,$19,$21
-	unop	# ldq	$31,-96($17)
-
-	srl	$2,$20,$8
-	ldq	$1,-80($17)
-	sll	$2,$19,$22
-	ldq	$2,-88($17)
-
-	stq	$5,-24($16)
-	or	$7,$24,$7
-	stq	$6,-32($16)
-	or	$8,$21,$8
-
-	srl	$3,$20,$5
-	unop	# ldq	$31,-96($17)
-	sll	$3,$19,$23
-	subq	$16,32,$16
-
-	srl	$4,$20,$6
-	ldq	$3,-96($17)
-	sll	$4,$19,$24
-	ldq	$4,-104($17)
-
-	subq	$17,32,$17
-	bne	$18,.Loop
- # cool down phase 2/1
-.Lend2:	stq	$7,-8($16)
-	or	$5,$22,$5
-	stq	$8,-16($16)
-	or	$6,$23,$6
-	srl	$1,$20,$7
-	sll	$1,$19,$21
-	srl	$2,$20,$8
-	sll	$2,$19,$22
-	stq	$5,-24($16)
-	or	$7,$24,$7
-	stq	$6,-32($16)
-	or	$8,$21,$8
-	srl	$3,$20,$5
-	sll	$3,$19,$23
-	srl	$4,$20,$6
-	sll	$4,$19,$24
- # cool down phase 2/2
-	stq	$7,-40($16)
-	or	$5,$22,$5
-	stq	$8,-48($16)
-	or	$6,$23,$6
-	stq	$5,-56($16)
-	stq	$6,-64($16)
- # cool down phase 2/3
-	stq	$24,-72($16)
-	ret	$31,($26),1
-
- # cool down phase 1/1
-.Lend1:	srl	$1,$20,$7
-	sll	$1,$19,$21
-	srl	$2,$20,$8
-	sll	$2,$19,$22
-	srl	$3,$20,$5
-	or	$7,$24,$7
-	sll	$3,$19,$23
-	or	$8,$21,$8
-	srl	$4,$20,$6
-	sll	$4,$19,$24
- # cool down phase 1/2
-	stq	$7,-8($16)
-	or	$5,$22,$5
-	stq	$8,-16($16)
-	or	$6,$23,$6
-	stq	$5,-24($16)
-	stq	$6,-32($16)
-	stq	$24,-40($16)
-	ret	$31,($26),1
-
-.Lend:	stq	$24,-8($16)
-	ret	$31,($26),1
-	.end	__mpn_lshift
diff --git a/mpn/alpha/ev5/rshift.asm b/mpn/alpha/ev5/rshift.asm
new file mode 100644
index 000000000..c3325579f
--- /dev/null
+++ b/mpn/alpha/ev5/rshift.asm
@@ -0,0 +1,167 @@
+dnl  Alpha EV5 __mpn_rshift -- Shift a number right.
+
+dnl  Copyright (C) 1994, 1995, 2000 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Library General Public License as published by
+dnl  the Free Software Foundation; either version 2 of the License, or (at your
+dnl  option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Library General Public License
+dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl  MA 02111-1307, USA.
+
+include(`../config.m4')
+
+dnl  INPUT PARAMETERS
+dnl  res_ptr	r16
+dnl  s1_ptr	r17
+dnl  size	r18
+dnl  cnt	r19
+
+dnl  This code runs at 3.25 cycles/limb on the EV5.
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+	ldq	r4,0(r17)	C load first limb
+	subq	r31,r19,r20
+	subq	r18,1,r18
+	and	r18,4-1,r28	C number of limbs in first loop
+	sll	r4,r20,r0	C compute function result
+
+	beq	r28,$L0
+	subq	r18,r28,r18
+
+	ALIGN(8)
+$Loop0:	ldq	r3,8(r17)
+	addq	r16,8,r16
+	srl	r4,r19,r5
+	addq	r17,8,r17
+	subq	r28,1,r28
+	sll	r3,r20,r6
+	bis	r3,r3,r4
+	bis	r5,r6,r8
+	stq	r8,-8(r16)
+	bne	r28,$Loop0
+
+$L0:	srl	r4,r19,r24
+	beq	r18,$Lend
+C warm up phase 1
+	ldq	r1,8(r17)
+	subq	r18,4,r18
+	ldq	r2,16(r17)
+	ldq	r3,24(r17)
+	ldq	r4,32(r17)
+	beq	r18,$Lend1
+C warm up phase 2
+	sll	r1,r20,r7
+	srl	r1,r19,r21
+	sll	r2,r20,r8
+	ldq	r1,40(r17)
+	srl	r2,r19,r22
+	ldq	r2,48(r17)
+	sll	r3,r20,r5
+	bis	r7,r24,r7
+	srl	r3,r19,r23
+	bis	r8,r21,r8
+	sll	r4,r20,r6
+	ldq	r3,56(r17)
+	srl	r4,r19,r24
+	ldq	r4,64(r17)
+	subq	r18,4,r18
+	beq	r18,$Lend2
+	ALIGN(16)
+C main loop
+$Loop:	stq	r7,0(r16)
+	bis	r5,r22,r5
+	stq	r8,8(r16)
+	bis	r6,r23,r6
+
+	sll	r1,r20,r7
+	subq	r18,4,r18
+	srl	r1,r19,r21
+	unop	C ldq	r31,-96(r17)
+
+	sll	r2,r20,r8
+	ldq	r1,72(r17)
+	srl	r2,r19,r22
+	ldq	r2,80(r17)
+
+	stq	r5,16(r16)
+	bis	r7,r24,r7
+	stq	r6,24(r16)
+	bis	r8,r21,r8
+
+	sll	r3,r20,r5
+	unop	C ldq	r31,-96(r17)
+	srl	r3,r19,r23
+	addq	r16,32,r16
+
+	sll	r4,r20,r6
+	ldq	r3,88(r17)
+	srl	r4,r19,r24
+	ldq	r4,96(r17)
+
+	addq	r17,32,r17
+	bne	r18,$Loop
+C cool down phase 2/1
+$Lend2:	stq	r7,0(r16)
+	bis	r5,r22,r5
+	stq	r8,8(r16)
+	bis	r6,r23,r6
+	sll	r1,r20,r7
+	srl	r1,r19,r21
+	sll	r2,r20,r8
+	srl	r2,r19,r22
+	stq	r5,16(r16)
+	bis	r7,r24,r7
+	stq	r6,24(r16)
+	bis	r8,r21,r8
+	sll	r3,r20,r5
+	srl	r3,r19,r23
+	sll	r4,r20,r6
+	srl	r4,r19,r24
+C cool down phase 2/2
+	stq	r7,32(r16)
+	bis	r5,r22,r5
+	stq	r8,40(r16)
+	bis	r6,r23,r6
+	stq	r5,48(r16)
+	stq	r6,56(r16)
+C cool down phase 2/3
+	stq	r24,64(r16)
+	ret	r31,(r26),1
+
+C cool down phase 1/1
+$Lend1:	sll	r1,r20,r7
+	srl	r1,r19,r21
+	sll	r2,r20,r8
+	srl	r2,r19,r22
+	sll	r3,r20,r5
+	bis	r7,r24,r7
+	srl	r3,r19,r23
+	bis	r8,r21,r8
+	sll	r4,r20,r6
+	srl	r4,r19,r24
+C cool down phase 1/2
+	stq	r7,0(r16)
+	bis	r5,r22,r5
+	stq	r8,8(r16)
+	bis	r6,r23,r6
+	stq	r5,16(r16)
+	stq	r6,24(r16)
+	stq	r24,32(r16)
+	ret	r31,(r26),1
+
+$Lend:	stq	r24,0(r16)
+	ret	r31,(r26),1
+EPILOGUE(mpn_rshift)
+ASM_END()
diff --git a/mpn/alpha/ev5/rshift.s b/mpn/alpha/ev5/rshift.s
deleted file mode 100644
index 6e24fef96..000000000
--- a/mpn/alpha/ev5/rshift.s
+++ /dev/null
@@ -1,172 +0,0 @@
- # Alpha EV5 __mpn_rshift --
-
- # Copyright (C) 1994, 1995 Free Software Foundation, Inc.
-
- # This file is part of the GNU MP Library.
-
- # The GNU MP Library is free software; you can redistribute it and/or modify
- # it under the terms of the GNU Library General Public License as published by
- # the Free Software Foundation; either version 2 of the License, or (at your
- # option) any later version.
-
- # The GNU MP Library is distributed in the hope that it will be useful, but
- # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
- # License for more details.
-
- # You should have received a copy of the GNU Library General Public License
- # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
- # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
- # MA 02111-1307, USA.
-
-
- # INPUT PARAMETERS
- # res_ptr	r16
- # s1_ptr	r17
- # size		r18
- # cnt		r19
-
- # This code runs at 3.25 cycles/limb on the EV5.
-
-	.set	noreorder
-	.set	noat
-.text
-	.align	3
-	.globl	__mpn_rshift
-	.ent	__mpn_rshift
-__mpn_rshift:
-	.frame	$30,0,$26,0
-
-	ldq	$4,0($17)	# load first limb
-	subq	$31,$19,$20
-	subq	$18,1,$18
-	and	$18,4-1,$28	# number of limbs in first loop
-	sll	$4,$20,$0	# compute function result
-
-	beq	$28,.L0
-	subq	$18,$28,$18
-
-	.align	3
-.Loop0:	ldq	$3,8($17)
-	addq	$16,8,$16
-	srl	$4,$19,$5
-	addq	$17,8,$17
-	subq	$28,1,$28
-	sll	$3,$20,$6
-	or	$3,$3,$4
-	or	$5,$6,$8
-	stq	$8,-8($16)
-	bne	$28,.Loop0
-
-.L0:	srl	$4,$19,$24
-	beq	$18,.Lend
- # warm up phase 1
-	ldq	$1,8($17)
-	subq	$18,4,$18
-	ldq	$2,16($17)
-	ldq	$3,24($17)
-	ldq	$4,32($17)
-	beq	$18,.Lend1
- # warm up phase 2
-	sll	$1,$20,$7
-	srl	$1,$19,$21
-	sll	$2,$20,$8
-	ldq	$1,40($17)
-	srl	$2,$19,$22
-	ldq	$2,48($17)
-	sll	$3,$20,$5
-	or	$7,$24,$7
-	srl	$3,$19,$23
-	or	$8,$21,$8
-	sll	$4,$20,$6
-	ldq	$3,56($17)
-	srl	$4,$19,$24
-	ldq	$4,64($17)
-	subq	$18,4,$18
-	beq	$18,.Lend2
-	.align  4
- # main loop
-.Loop:	stq	$7,0($16)
-	or	$5,$22,$5
-	stq	$8,8($16)
-	or	$6,$23,$6
-
-	sll	$1,$20,$7
-	subq	$18,4,$18
-	srl	$1,$19,$21
-	unop	# ldq	$31,-96($17)
-
-	sll	$2,$20,$8
-	ldq	$1,72($17)
-	srl	$2,$19,$22
-	ldq	$2,80($17)
-
-	stq	$5,16($16)
-	or	$7,$24,$7
-	stq	$6,24($16)
-	or	$8,$21,$8
-
-	sll	$3,$20,$5
-	unop	# ldq	$31,-96($17)
-	srl	$3,$19,$23
-	addq	$16,32,$16
-
-	sll	$4,$20,$6
-	ldq	$3,88($17)
-	srl	$4,$19,$24
-	ldq	$4,96($17)
-
-	addq	$17,32,$17
-	bne	$18,.Loop
- # cool down phase 2/1
-.Lend2:	stq	$7,0($16)
-	or	$5,$22,$5
-	stq	$8,8($16)
-	or	$6,$23,$6
-	sll	$1,$20,$7
-	srl	$1,$19,$21
-	sll	$2,$20,$8
-	srl	$2,$19,$22
-	stq	$5,16($16)
-	or	$7,$24,$7
-	stq	$6,24($16)
-	or	$8,$21,$8
-	sll	$3,$20,$5
-	srl	$3,$19,$23
-	sll	$4,$20,$6
-	srl	$4,$19,$24
- # cool down phase 2/2
-	stq	$7,32($16)
-	or	$5,$22,$5
-	stq	$8,40($16)
-	or	$6,$23,$6
-	stq	$5,48($16)
-	stq	$6,56($16)
- # cool down phase 2/3
-	stq	$24,64($16)
-	ret	$31,($26),1
-
- # cool down phase 1/1
-.Lend1:	sll	$1,$20,$7
-	srl	$1,$19,$21
-	sll	$2,$20,$8
-	srl	$2,$19,$22
-	sll	$3,$20,$5
-	or	$7,$24,$7
-	srl	$3,$19,$23
-	or	$8,$21,$8
-	sll	$4,$20,$6
-	srl	$4,$19,$24
- # cool down phase 1/2
-	stq	$7,0($16)
-	or	$5,$22,$5
-	stq	$8,8($16)
-	or	$6,$23,$6
-	stq	$5,16($16)
-	stq	$6,24($16)
-	stq	$24,32($16)
-	ret	$31,($26),1
-
-.Lend:	stq	$24,0($16)
-	ret	$31,($26),1
-	.end	__mpn_rshift
diff --git a/mpn/alpha/ev5/sub_n.asm b/mpn/alpha/ev5/sub_n.asm
new file mode 100644
index 000000000..213c2c885
--- /dev/null
+++ b/mpn/alpha/ev5/sub_n.asm
@@ -0,0 +1,143 @@
+dnl  Alpha EV5 __mpn_sub_n -- Subtract two limb vectors of the same length > 0
+dnl  and store difference in a third limb vector.
+
+dnl  Copyright (C) 1995, 1999, 2000 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Library General Public License as published by
+dnl  the Free Software Foundation; either version 2 of the License, or (at your
+dnl  option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Library General Public License
+dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl  MA 02111-1307, USA.
+
+include(`../config.m4')
+
+dnl  INPUT PARAMETERS
+dnl  res_ptr	r16
+dnl  s1_ptr	r17
+dnl  s2_ptr	r18
+dnl  size	r19
+
+ASM_START()
+PROLOGUE(mpn_sub_n)
+	bis	r31,r31,r25		C clear cy
+	subq	r19,4,r19		C decr loop cnt
+	blt	r19,$Lend2		C if less than 4 limbs, goto 2nd loop
+C Start software pipeline for 1st loop
+	ldq	r0,0(r18)
+	ldq	r4,0(r17)
+	ldq	r1,8(r18)
+	ldq	r5,8(r17)
+	addq	r17,32,r17		C update s1_ptr
+	ldq	r2,16(r18)
+	subq	r4,r0,r20		C 1st main subtract
+	ldq	r3,24(r18)
+	subq	r19,4,r19		C decr loop cnt
+	ldq	r6,-16(r17)
+	cmpult	r4,r0,r25		C compute cy from last subtract
+	ldq	r7,-8(r17)
+	subq	r5,r1,r28		C 2nd main subtract
+	addq	r18,32,r18		C update s2_ptr
+	subq	r28,r25,r21		C 2nd carry subtract
+	cmpult	r5,r1,r8		C compute cy from last subtract
+	blt	r19,$Lend1		C if less than 4 limbs remain, jump
+C 1st loop handles groups of 4 limbs in a software pipeline
+	ALIGN(16)
+$Loop:	cmpult	r28,r25,r25		C compute cy from last subtract
+	ldq	r0,0(r18)
+	bis	r8,r25,r25		C combine cy from the two subtracts
+	ldq	r1,8(r18)
+	subq	r6,r2,r28		C 3rd main subtract
+	ldq	r4,0(r17)
+	subq	r28,r25,r22		C 3rd carry subtract
+	ldq	r5,8(r17)
+	cmpult	r6,r2,r8		C compute cy from last subtract
+	cmpult	r28,r25,r25		C compute cy from last subtract
+	stq	r20,0(r16)
+	bis	r8,r25,r25		C combine cy from the two subtracts
+	stq	r21,8(r16)
+	subq	r7,r3,r28		C 4th main subtract
+	subq	r28,r25,r23		C 4th carry subtract
+	cmpult	r7,r3,r8		C compute cy from last subtract
+	cmpult	r28,r25,r25		C compute cy from last subtract
+		addq	r17,32,r17		C update s1_ptr
+	bis	r8,r25,r25		C combine cy from the two subtracts
+		addq	r16,32,r16		C update res_ptr
+	subq	r4,r0,r28		C 1st main subtract
+	ldq	r2,16(r18)
+	subq	r28,r25,r20		C 1st carry subtract
+	ldq	r3,24(r18)
+	cmpult	r4,r0,r8		C compute cy from last subtract
+	ldq	r6,-16(r17)
+	cmpult	r28,r25,r25		C compute cy from last subtract
+	ldq	r7,-8(r17)
+	bis	r8,r25,r25		C combine cy from the two subtracts
+	subq	r19,4,r19		C decr loop cnt
+	stq	r22,-16(r16)
+	subq	r5,r1,r28		C 2nd main subtract
+	stq	r23,-8(r16)
+	subq	r28,r25,r21		C 2nd carry subtract
+		addq	r18,32,r18		C update s2_ptr
+	cmpult	r5,r1,r8		C compute cy from last subtract
+	bge	r19,$Loop
+C Finish software pipeline for 1st loop
+$Lend1:	cmpult	r28,r25,r25		C compute cy from last subtract
+	bis	r8,r25,r25		C combine cy from the two subtracts
+	subq	r6,r2,r28		C cy add
+	subq	r28,r25,r22		C 3rd main subtract
+	cmpult	r6,r2,r8		C compute cy from last subtract
+	cmpult	r28,r25,r25		C compute cy from last subtract
+	stq	r20,0(r16)
+	bis	r8,r25,r25		C combine cy from the two subtracts
+	stq	r21,8(r16)
+	subq	r7,r3,r28		C cy add
+	subq	r28,r25,r23		C 4th main subtract
+	cmpult	r7,r3,r8		C compute cy from last subtract
+	cmpult	r28,r25,r25		C compute cy from last subtract
+	bis	r8,r25,r25		C combine cy from the two subtracts
+	addq	r16,32,r16		C update res_ptr
+	stq	r22,-16(r16)
+	stq	r23,-8(r16)
+$Lend2:	addq	r19,4,r19		C restore loop cnt
+	beq	r19,$Lret
+C Start software pipeline for 2nd loop
+	ldq	r0,0(r18)
+	ldq	r4,0(r17)
+	subq	r19,1,r19
+	beq	r19,$Lend0
+C 2nd loop handles remaining 1-3 limbs
+	ALIGN(16)
+$Loop0:	subq	r4,r0,r28		C main subtract
+	cmpult	r4,r0,r8		C compute cy from last subtract
+	ldq	r0,8(r18)
+	ldq	r4,8(r17)
+	subq	r28,r25,r20		C carry subtract
+	addq	r18,8,r18
+	addq	r17,8,r17
+	stq	r20,0(r16)
+	cmpult	r28,r25,r25		C compute cy from last subtract
+	subq	r19,1,r19		C decr loop cnt
+	bis	r8,r25,r25		C combine cy from the two subtracts
+	addq	r16,8,r16
+	bne	r19,$Loop0
+$Lend0:	subq	r4,r0,r28		C main subtract
+	subq	r28,r25,r20		C carry subtract
+	cmpult	r4,r0,r8		C compute cy from last subtract
+	cmpult	r28,r25,r25		C compute cy from last subtract
+	stq	r20,0(r16)
+	bis	r8,r25,r25		C combine cy from the two subtracts
+
+$Lret:	bis	r25,r31,r0		C return cy
+	ret	r31,(r26),1
+EPILOGUE(mpn_sub_n)
+ASM_END()
diff --git a/mpn/alpha/ev5/sub_n.s b/mpn/alpha/ev5/sub_n.s
deleted file mode 100644
index 36994b956..000000000
--- a/mpn/alpha/ev5/sub_n.s
+++ /dev/null
@@ -1,148 +0,0 @@
- # Alpha EV5 __mpn_sub_n -- Subtract two limb vectors of the same length > 0
- # and store difference in a third limb vector.
-
- # Copyright (C) 1995, 1999 Free Software Foundation, Inc.
-
- # This file is part of the GNU MP Library.
-
- # The GNU MP Library is free software; you can redistribute it and/or modify
- # it under the terms of the GNU Library General Public License as published by
- # the Free Software Foundation; either version 2 of the License, or (at your
- # option) any later version.
-
- # The GNU MP Library is distributed in the hope that it will be useful, but
- # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
- # License for more details.
-
- # You should have received a copy of the GNU Library General Public License
- # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
- # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
- # MA 02111-1307, USA.
-
-
- # INPUT PARAMETERS
- # res_ptr	$16
- # s1_ptr	$17
- # s2_ptr	$18
- # size		$19
-
-	.set	noreorder
-	.set	noat
-.text
-	.align	3
-	.globl	__mpn_sub_n
-	.ent	__mpn_sub_n
-__mpn_sub_n:
-	.frame	$30,0,$26,0
-
-	or	$31,$31,$25		# clear cy
-	subq	$19,4,$19		# decr loop cnt
-	blt	$19,.Lend2		# if less than 4 limbs, goto 2nd loop
- # Start software pipeline for 1st loop
-	ldq	$0,0($18)
-	ldq	$4,0($17)
-	ldq	$1,8($18)
-	ldq	$5,8($17)
-	addq	$17,32,$17		# update s1_ptr
-	ldq	$2,16($18)
-	subq	$4,$0,$20		# 1st main subtract
-	ldq	$3,24($18)
-	subq	$19,4,$19		# decr loop cnt
-	ldq	$6,-16($17)
-	cmpult	$4,$0,$25		# compute cy from last subtract
-	ldq	$7,-8($17)
-	subq	$5,$1,$28		# 2nd main subtract
-	addq	$18,32,$18		# update s2_ptr
-	subq	$28,$25,$21		# 2nd carry subtract
-	cmpult	$5,$1,$8		# compute cy from last subtract
-	blt	$19,.Lend1		# if less than 4 limbs remain, jump
- # 1st loop handles groups of 4 limbs in a software pipeline
-	.align	4
-.Loop:	cmpult	$28,$25,$25		# compute cy from last subtract
-	ldq	$0,0($18)
-	or	$8,$25,$25		# combine cy from the two subtracts
-	ldq	$1,8($18)
-	subq	$6,$2,$28		# 3rd main subtract
-	ldq	$4,0($17)
-	subq	$28,$25,$22		# 3rd carry subtract
-	ldq	$5,8($17)
-	cmpult	$6,$2,$8		# compute cy from last subtract
-	cmpult	$28,$25,$25		# compute cy from last subtract
-	stq	$20,0($16)
-	or	$8,$25,$25		# combine cy from the two subtracts
-	stq	$21,8($16)
-	subq	$7,$3,$28		# 4th main subtract
-	subq	$28,$25,$23		# 4th carry subtract
-	cmpult	$7,$3,$8		# compute cy from last subtract
-	cmpult	$28,$25,$25		# compute cy from last subtract
-		addq	$17,32,$17		# update s1_ptr
-	or	$8,$25,$25		# combine cy from the two subtracts
-		addq	$16,32,$16		# update res_ptr
-	subq	$4,$0,$28		# 1st main subtract
-	ldq	$2,16($18)
-	subq	$28,$25,$20		# 1st carry subtract
-	ldq	$3,24($18)
-	cmpult	$4,$0,$8		# compute cy from last subtract
-	ldq	$6,-16($17)
-	cmpult	$28,$25,$25		# compute cy from last subtract
-	ldq	$7,-8($17)
-	or	$8,$25,$25		# combine cy from the two subtracts
-	subq	$19,4,$19		# decr loop cnt
-	stq	$22,-16($16)
-	subq	$5,$1,$28		# 2nd main subtract
-	stq	$23,-8($16)
-	subq	$28,$25,$21		# 2nd carry subtract
-		addq	$18,32,$18		# update s2_ptr
-	cmpult	$5,$1,$8		# compute cy from last subtract
-	bge	$19,.Loop
- # Finish software pipeline for 1st loop
-.Lend1:	cmpult	$28,$25,$25		# compute cy from last subtract
-	or	$8,$25,$25		# combine cy from the two subtracts
-	subq	$6,$2,$28		# cy add
-	subq	$28,$25,$22		# 3rd main subtract
-	cmpult	$6,$2,$8		# compute cy from last subtract
-	cmpult	$28,$25,$25		# compute cy from last subtract
-	stq	$20,0($16)
-	or	$8,$25,$25		# combine cy from the two subtracts
-	stq	$21,8($16)
-	subq	$7,$3,$28		# cy add
-	subq	$28,$25,$23		# 4th main subtract
-	cmpult	$7,$3,$8		# compute cy from last subtract
-	cmpult	$28,$25,$25		# compute cy from last subtract
-	or	$8,$25,$25		# combine cy from the two subtracts
-	addq	$16,32,$16		# update res_ptr
-	stq	$22,-16($16)
-	stq	$23,-8($16)
-.Lend2:	addq	$19,4,$19		# restore loop cnt
-	beq	$19,.Lret
- # Start software pipeline for 2nd loop
-	ldq	$0,0($18)
-	ldq	$4,0($17)
-	subq	$19,1,$19
-	beq	$19,.Lend0
- # 2nd loop handles remaining 1-3 limbs
-	.align	4
-.Loop0:	subq	$4,$0,$28		# main subtract
-	cmpult	$4,$0,$8		# compute cy from last subtract
-	ldq	$0,8($18)
-	ldq	$4,8($17)
-	subq	$28,$25,$20		# carry subtract
-	addq	$18,8,$18
-	addq	$17,8,$17
-	stq	$20,0($16)
-	cmpult	$28,$25,$25		# compute cy from last subtract
-	subq	$19,1,$19		# decr loop cnt
-	or	$8,$25,$25		# combine cy from the two subtracts
-	addq	$16,8,$16
-	bne	$19,.Loop0
-.Lend0:	subq	$4,$0,$28		# main subtract
-	subq	$28,$25,$20		# carry subtract
-	cmpult	$4,$0,$8		# compute cy from last subtract
-	cmpult	$28,$25,$25		# compute cy from last subtract
-	stq	$20,0($16)
-	or	$8,$25,$25		# combine cy from the two subtracts
-
-.Lret:	or	$25,$31,$0		# return cy
-	ret	$31,($26),1
-	.end	__mpn_sub_n
diff --git a/mpn/alpha/invert-limb.s b/mpn/alpha/invert_limb.asm
index 9706f4b76..9e5cb22a1 100644
--- a/mpn/alpha/invert-limb.s
+++ b/mpn/alpha/invert_limb.asm
@@ -1,101 +1,90 @@
- # Alpha mpn_invert_normalized_limb -- Invert a normalized limb.
+dnl  Alpha mpn_invert_normalized_limb -- Invert a normalized limb.
 
- # Copyright (C) 1996 Free Software Foundation, Inc.
+dnl  Copyright (C) 1996, 2000 Free Software Foundation, Inc.
 
- # This file is part of the GNU MP Library.
+dnl  This file is part of the GNU MP Library.
 
- # The GNU MP Library is free software; you can redistribute it and/or modify
- # it under the terms of the GNU Library General Public License as published by
- # the Free Software Foundation; either version 2 of the License, or (at your
- # option) any later version.
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Library General Public License as published by
+dnl  the Free Software Foundation; either version 2 of the License, or (at your
+dnl  option) any later version.
 
- # The GNU MP Library is distributed in the hope that it will be useful, but
- # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
- # License for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+dnl  License for more details.
 
- # You should have received a copy of the GNU Library General Public License
- # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
- # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
- # MA 02111-1307, USA.
+dnl  You should have received a copy of the GNU Library General Public License
+dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl  MA 02111-1307, USA.
 
- #
- # This is based on sophie:/gmp-stuff/dbg-inv-limb.c.
- # The ideas are due to Peter L. Montgomery
- #
- # The table below uses 4096 bytes.  The file mentioned above has an
- # alternative function that doesn't require the table, but it runs 50%
- # slower than this.
+dnl 
+dnl  This is based on sophie:/gmp-stuff/dbg-inv-limb.c.
+dnl  The ideas are due to Peter L. Montgomery
+dnl 
+dnl  The table below uses 4096 bytes.  The file mentioned above has an
+dnl  alternative function that doesn't require the table, but it runs 50%
+dnl  slower than this.
 
-	.set	noreorder
-	.set	volatile
-	.set	noat
-.text
-	.align	3
-$C36:
-	.t_floating	9223372036854775808.0
-.text
-	.align	3
-	.globl	__mpn_invert_normalized_limb
-	.ent	__mpn_invert_normalized_limb
-__mpn_invert_normalized_limb:
-	ldgp	$29,0($27)
-__mpn_invert_normalized_limb..ng:
-	lda	$30,-16($30)
-	.frame	$30,16,$26,0
-	.prologue	1
-	addq	$16,$16,$1
-	bne	$1,$73
-	lda	$0,-1
-	br	$31,.Lend
+include(`../config.m4')
+
+ASM_START()
+
+INT64($C36,X(43e0000000000000))		C 2^63
+
+PROLOGUE_GP(mpn_invert_normalized_limb)
+	lda	r30,-16(r30)
+	addq	r16,r16,r1
+	bne	r1,$73
+	lda	r0,-1
+	br	r31,$Lend
 $73:
-	srl	$16,1,$1
-	stq	$1,0($30)
-	ldt	$f11,0($30)
-	cvtqt	$f11,$f1
-	lda	$1,$C36
-	ldt	$f10,0($1)
-	divt	$f10,$f1,$f10
-	lda	$2,invtab-4096
-	srl	$16,52,$1
-	addq	$1,$1,$1
-	addq	$1,$2,$1
-	bic	$1,6,$2
-	ldq	$2,0($2)
-	bic	$1,1,$1
-	extwl	$2,$1,$2
-	sll	$2,48,$0
-	umulh	$16,$0,$1
-	addq	$16,$1,$3
-	stq	$3,0($30)
-	ldt	$f11,0($30)
-	cvtqt	$f11,$f1
-	mult	$f1,$f10,$f1
-	cvttqc	$f1,$f1
-	stt	$f1,0($30)
-	ldq	$4,0($30)
-	subq	$0,$4,$0
-	umulh	$16,$0,$1
-	mulq	$16,$0,$2
-	addq	$16,$1,$3
-	bge	$3,.Loop2
-.Loop1:	addq	$2,$16,$2
-	cmpult	$2,$16,$1
-	addq	$3,$1,$3
-	addq	$0,1,$0
-	blt	$3,.Loop1
-.Loop2:	cmpult	$2,$16,$1
-	subq	$0,1,$0
-	subq	$3,$1,$3
-	subq	$2,$16,$2
-	bge	$3,.Loop2
-.Lend:
-	addq	$30,16,$30
-	ret	$31,($26),1
-	.end	__mpn_invert_normalized_limb
-.text
-	.align 1
-invtab:
+	srl	r16,1,r1
+	stq	r1,0(r30)
+	ldt	f11,0(r30)
+	cvtqt	f11,f1
+	lda	r1,$C36
+	ldt	f10,0(r1)
+	divt	f10,f1,f10
+	lda	r2,$invtab-4096
+	srl	r16,52,r1
+	addq	r1,r1,r1
+	addq	r1,r2,r1
+	bic	r1,6,r2
+	ldq	r2,0(r2)
+	bic	r1,1,r1
+	extwl	r2,r1,r2
+	sll	r2,48,r0
+	umulh	r16,r0,r1
+	addq	r16,r1,r3
+	stq	r3,0(r30)
+	ldt	f11,0(r30)
+	cvtqt	f11,f1
+	mult	f1,f10,f1
+	cvttq/c	f1,f1
+	stt	f1,0(r30)
+	ldq	r4,0(r30)
+	subq	r0,r4,r0
+	umulh	r16,r0,r1
+	mulq	r16,r0,r2
+	addq	r16,r1,r3
+	bge	r3,$Loop2
+$Loop1:	addq	r2,r16,r2
+	cmpult	r2,r16,r1
+	addq	r3,r1,r3
+	addq	r0,1,r0
+	blt	r3,$Loop1
+$Loop2:	cmpult	r2,r16,r1
+	subq	r0,1,r0
+	subq	r3,r1,r3
+	subq	r2,r16,r2
+	bge	r3,$Loop2
+$Lend:
+	lda	r30,16(r30)
+	ret	r31,(r26),1
+EPILOGUE(mpn_invert_normalized_limb)
+DATASTART(`$invtab',4)
 	.word 0xffff,0xffc0,0xff80,0xff40,0xff00,0xfec0,0xfe81,0xfe41
 	.word 0xfe01,0xfdc2,0xfd83,0xfd43,0xfd04,0xfcc5,0xfc86,0xfc46
 	.word 0xfc07,0xfbc8,0xfb8a,0xfb4b,0xfb0c,0xfacd,0xfa8e,0xfa50
@@ -352,3 +341,5 @@ invtab:
 	.word 0x0182,0x0172,0x0161,0x0151,0x0141,0x0131,0x0121,0x0111
 	.word 0x0101,0x00f0,0x00e0,0x00d0,0x00c0,0x00b0,0x00a0,0x0090
 	.word 0x0080,0x0070,0x0060,0x0050,0x0040,0x0030,0x0020,0x0010
+DATAEND()
+ASM_END()
diff --git a/mpn/alpha/lshift.asm b/mpn/alpha/lshift.asm
new file mode 100644
index 000000000..de0ce473c
--- /dev/null
+++ b/mpn/alpha/lshift.asm
@@ -0,0 +1,104 @@
+dnl  Alpha mpn_lshift -- Shift a number left.
+
+dnl  Copyright (C) 1994, 1995, 2000 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Library General Public License as published by
+dnl  the Free Software Foundation; either version 2 of the License, or (at your
+dnl  option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Library General Public License
+dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl  MA 02111-1307, USA.
+
+include(`../config.m4')
+
+dnl  INPUT PARAMETERS
+dnl  res_ptr	r16
+dnl  s1_ptr	r17
+dnl  size	r18
+dnl  cnt	r19
+
+dnl  This code runs at 4.8 cycles/limb on the 21064.  With infinite unrolling,
+dnl  it would take 4 cycles/limb.  It should be possible to get down to 3
+dnl  cycles/limb since both ldq and stq can be paired with the other used
+dnl  instructions.  But there are many restrictions in the 21064 pipeline that
+dnl  makes it hard, if not impossible, to get down to 3 cycles/limb:
+
+dnl  1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay.
+dnl  2. Only aligned instruction pairs can be paired.
+dnl  3. The store buffer or silo might not be able to deal with the bandwidth.
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+	s8addq	r18,r17,r17	C make r17 point at end of s1
+	ldq	r4,-8(r17)	C load first limb
+	subq	r17,8,r17
+	subq	r31,r19,r7
+	s8addq	r18,r16,r16	C make r16 point at end of RES
+	subq	r18,1,r18
+	and	r18,4-1,r20	C number of limbs in first loop
+	srl	r4,r7,r0	C compute function result
+
+	beq	r20,$L0
+	subq	r18,r20,r18
+
+	ALIGN(8)
+$Loop0:
+	ldq	r3,-8(r17)
+	subq	r16,8,r16
+	subq	r17,8,r17
+	subq	r20,1,r20
+	sll	r4,r19,r5
+	srl	r3,r7,r6
+	bis	r3,r3,r4
+	bis	r5,r6,r8
+	stq	r8,0(r16)
+	bne	r20,$Loop0
+
+$L0:	beq	r18,$Lend
+
+	ALIGN(8)
+$Loop:	ldq	r3,-8(r17)
+	subq	r16,32,r16
+	subq	r18,4,r18
+	sll	r4,r19,r5
+	srl	r3,r7,r6
+
+	ldq	r4,-16(r17)
+	sll	r3,r19,r1
+	bis	r5,r6,r8
+	stq	r8,24(r16)
+	srl	r4,r7,r2
+
+	ldq	r3,-24(r17)
+	sll	r4,r19,r5
+	bis	r1,r2,r8
+	stq	r8,16(r16)
+	srl	r3,r7,r6
+
+	ldq	r4,-32(r17)
+	sll	r3,r19,r1
+	bis	r5,r6,r8
+	stq	r8,8(r16)
+	srl	r4,r7,r2
+
+	subq	r17,32,r17
+	bis	r1,r2,r8
+	stq	r8,0(r16)
+
+	bgt	r18,$Loop
+
+$Lend:	sll	r4,r19,r8
+	stq	r8,-8(r16)
+	ret	r31,(r26),1
+EPILOGUE(mpn_lshift)
+ASM_END()
diff --git a/mpn/alpha/lshift.s b/mpn/alpha/lshift.s
deleted file mode 100644
index 6a3e55a93..000000000
--- a/mpn/alpha/lshift.s
+++ /dev/null
@@ -1,109 +0,0 @@
- # Alpha __mpn_lshift --
-
- # Copyright (C) 1994, 1995 Free Software Foundation, Inc.
-
- # This file is part of the GNU MP Library.
-
- # The GNU MP Library is free software; you can redistribute it and/or modify
- # it under the terms of the GNU Library General Public License as published by
- # the Free Software Foundation; either version 2 of the License, or (at your
- # option) any later version.
-
- # The GNU MP Library is distributed in the hope that it will be useful, but
- # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
- # License for more details.
-
- # You should have received a copy of the GNU Library General Public License
- # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
- # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
- # MA 02111-1307, USA.
-
-
- # INPUT PARAMETERS
- # res_ptr	r16
- # s1_ptr	r17
- # size		r18
- # cnt		r19
-
- # This code runs at 4.8 cycles/limb on the 21064.  With infinite unrolling,
- # it would take 4 cycles/limb.  It should be possible to get down to 3
- # cycles/limb since both ldq and stq can be paired with the other used
- # instructions.  But there are many restrictions in the 21064 pipeline that
- # makes it hard, if not impossible, to get down to 3 cycles/limb:
-
- # 1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay.
- # 2. Only aligned instruction pairs can be paired.
- # 3. The store buffer or silo might not be able to deal with the bandwidth.
-
-	.set	noreorder
-	.set	noat
-.text
-	.align	3
-	.globl	__mpn_lshift
-	.ent	__mpn_lshift
-__mpn_lshift:
-	.frame	$30,0,$26,0
-
-	s8addq	$18,$17,$17	# make r17 point at end of s1
-	ldq	$4,-8($17)	# load first limb
-	subq	$17,8,$17
-	subq	$31,$19,$7
-	s8addq	$18,$16,$16	# make r16 point at end of RES
-	subq	$18,1,$18
-	and	$18,4-1,$20	# number of limbs in first loop
-	srl	$4,$7,$0	# compute function result
-
-	beq	$20,.L0
-	subq	$18,$20,$18
-
-	.align	3
-.Loop0:
-	ldq	$3,-8($17)
-	subq	$16,8,$16
-	subq	$17,8,$17
-	subq	$20,1,$20
-	sll	$4,$19,$5
-	srl	$3,$7,$6
-	bis	$3,$3,$4
-	bis	$5,$6,$8
-	stq	$8,0($16)
-	bne	$20,.Loop0
-
-.L0:	beq	$18,.Lend
-
-	.align	3
-.Loop:	ldq	$3,-8($17)
-	subq	$16,32,$16
-	subq	$18,4,$18
-	sll	$4,$19,$5
-	srl	$3,$7,$6
-
-	ldq	$4,-16($17)
-	sll	$3,$19,$1
-	bis	$5,$6,$8
-	stq	$8,24($16)
-	srl	$4,$7,$2
-
-	ldq	$3,-24($17)
-	sll	$4,$19,$5
-	bis	$1,$2,$8
-	stq	$8,16($16)
-	srl	$3,$7,$6
-
-	ldq	$4,-32($17)
-	sll	$3,$19,$1
-	bis	$5,$6,$8
-	stq	$8,8($16)
-	srl	$4,$7,$2
-
-	subq	$17,32,$17
-	bis	$1,$2,$8
-	stq	$8,0($16)
-
-	bgt	$18,.Loop
-
-.Lend:	sll	$4,$19,$8
-	stq	$8,-8($16)
-	ret	$31,($26),1
-	.end	__mpn_lshift
diff --git a/mpn/alpha/mul_1.asm b/mpn/alpha/mul_1.asm
new file mode 100644
index 000000000..94cd55c9c
--- /dev/null
+++ b/mpn/alpha/mul_1.asm
@@ -0,0 +1,71 @@
+dnl  Alpha __mpn_mul_1 -- Multiply a limb vector with a limb and store
+dnl  the result in a second limb vector.
+
+dnl  Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Library General Public License as published by
+dnl  the Free Software Foundation; either version 2 of the License, or (at your
+dnl  option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Library General Public License
+dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl  MA 02111-1307, USA.
+
+include(`../config.m4')
+
+dnl  INPUT PARAMETERS
+dnl  res_ptr	r16
+dnl  s1_ptr	r17
+dnl  size	r18
+dnl  s2_limb	r19
+
+dnl  This code runs at 42 cycles/limb on EV4, 18 cycles/limb on EV5, and 7
+dnl  cycles/limb on EV6.
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+	ldq	r2,0(r17)	C r2 = s1_limb
+	subq	r18,1,r18	C size--
+	mulq	r2,r19,r3	C r3 = prod_low
+	bic	r31,r31,r4	C clear cy_limb
+	umulh	r2,r19,r0	C r0 = prod_high
+	beq	r18,$Lend1	C jump if size was == 1
+	ldq	r2,8(r17)	C r2 = s1_limb
+	subq	r18,1,r18	C size--
+	stq	r3,0(r16)
+	beq	r18,$Lend2	C jump if size was == 2
+
+	ALIGN(8)
+$Loop:	mulq	r2,r19,r3	C r3 = prod_low
+	addq	r4,r0,r0	C cy_limb = cy_limb + 'cy'
+	subq	r18,1,r18	C size--
+	umulh	r2,r19,r4	C r4 = cy_limb
+	ldq	r2,16(r17)	C r2 = s1_limb
+	addq	r17,8,r17	C s1_ptr++
+	addq	r3,r0,r3	C r3 = cy_limb + prod_low
+	stq	r3,8(r16)
+	cmpult	r3,r0,r0	C r0 = carry from (cy_limb + prod_low)
+	addq	r16,8,r16	C res_ptr++
+	bne	r18,$Loop
+
+$Lend2:	mulq	r2,r19,r3	C r3 = prod_low
+	addq	r4,r0,r0	C cy_limb = cy_limb + 'cy'
+	umulh	r2,r19,r4	C r4 = cy_limb
+	addq	r3,r0,r3	C r3 = cy_limb + prod_low
+	cmpult	r3,r0,r0	C r0 = carry from (cy_limb + prod_low)
+	stq	r3,8(r16)
+	addq	r4,r0,r0	C cy_limb = prod_high + cy
+	ret	r31,(r26),1
+$Lend1:	stq	r3,0(r16)
+	ret	r31,(r26),1
+EPILOGUE(mpn_mul_1)
+ASM_END()
diff --git a/mpn/alpha/mul_1.s b/mpn/alpha/mul_1.s
deleted file mode 100644
index 470c89368..000000000
--- a/mpn/alpha/mul_1.s
+++ /dev/null
@@ -1,85 +0,0 @@
- # Alpha __mpn_mul_1 -- Multiply a limb vector with a limb and store
- # the result in a second limb vector.
-
- # Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
-
- # This file is part of the GNU MP Library.
-
- # The GNU MP Library is free software; you can redistribute it and/or modify
- # it under the terms of the GNU Library General Public License as published by
- # the Free Software Foundation; either version 2 of the License, or (at your
- # option) any later version.
-
- # The GNU MP Library is distributed in the hope that it will be useful, but
- # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
- # License for more details.
-
- # You should have received a copy of the GNU Library General Public License
- # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
- # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
- # MA 02111-1307, USA.
-
-
- # INPUT PARAMETERS
- # res_ptr	r16
- # s1_ptr	r17
- # size		r18
- # s2_limb	r19
-
- # This code runs at 42 cycles/limb on the EV4 and 18 cycles/limb on the EV5.
-
- # To improve performance for long multiplications, we would use
- # 'fetch' for S1 and 'fetch_m' for RES.  It's not obvious how to use
- # these instructions without slowing down the general code: 1. We can
- # only have two prefetches in operation at any time in the Alpha
- # architecture.  2. There will seldom be any special alignment
- # between RES_PTR and S1_PTR.  Maybe we can simply divide the current
- # loop into an inner and outer loop, having the inner loop handle
- # exactly one prefetch block?
-
-	.set	noreorder
-	.set	noat
-.text
-	.align	3
-	.globl	__mpn_mul_1
-	.ent	__mpn_mul_1 2
-__mpn_mul_1:
-	.frame	$30,0,$26
-
-	ldq	$2,0($17)	# $2 = s1_limb
-	subq	$18,1,$18	# size--
-	mulq	$2,$19,$3	# $3 = prod_low
-	bic	$31,$31,$4	# clear cy_limb
-	umulh	$2,$19,$0	# $0 = prod_high
-	beq	$18,.Lend1	# jump if size was == 1
-	ldq	$2,8($17)	# $2 = s1_limb
-	subq	$18,1,$18	# size--
-	stq	$3,0($16)
-	beq	$18,.Lend2	# jump if size was == 2
-
-	.align	3
-.Loop:	mulq	$2,$19,$3	# $3 = prod_low
-	addq	$4,$0,$0	# cy_limb = cy_limb + 'cy'
-	subq	$18,1,$18	# size--
-	umulh	$2,$19,$4	# $4 = cy_limb
-	ldq	$2,16($17)	# $2 = s1_limb
-	addq	$17,8,$17	# s1_ptr++
-	addq	$3,$0,$3	# $3 = cy_limb + prod_low
-	stq	$3,8($16)
-	cmpult	$3,$0,$0	# $0 = carry from (cy_limb + prod_low)
-	addq	$16,8,$16	# res_ptr++
-	bne	$18,.Loop
-
-.Lend2:	mulq	$2,$19,$3	# $3 = prod_low
-	addq	$4,$0,$0	# cy_limb = cy_limb + 'cy'
-	umulh	$2,$19,$4	# $4 = cy_limb
-	addq	$3,$0,$3	# $3 = cy_limb + prod_low
-	cmpult	$3,$0,$0	# $0 = carry from (cy_limb + prod_low)
-	stq	$3,8($16)
-	addq	$4,$0,$0	# cy_limb = prod_high + cy
-	ret	$31,($26),1
-.Lend1:	stq	$3,0($16)
-	ret	$31,($26),1
-
-	.end	__mpn_mul_1
diff --git a/mpn/alpha/rshift.asm b/mpn/alpha/rshift.asm
new file mode 100644
index 000000000..4c111d237
--- /dev/null
+++ b/mpn/alpha/rshift.asm
@@ -0,0 +1,102 @@
+dnl  Alpha mpn_rshift -- Shift a number right.
+
+dnl  Copyright (C) 1994, 1995, 2000 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Library General Public License as published by
+dnl  the Free Software Foundation; either version 2 of the License, or (at your
+dnl  option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Library General Public License
+dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl  MA 02111-1307, USA.
+
+include(`../config.m4')
+
+dnl  INPUT PARAMETERS
+dnl  res_ptr	r16
+dnl  s1_ptr	r17
+dnl  size	r18
+dnl  cnt	r19
+
+dnl  This code runs at 4.8 cycles/limb on the 21064.  With infinite unrolling,
+dnl  it would take 4 cycles/limb.  It should be possible to get down to 3
+dnl  cycles/limb since both ldq and stq can be paired with the other used
+dnl  instructions.  But there are many restrictions in the 21064 pipeline that
+dnl  makes it hard, if not impossible, to get down to 3 cycles/limb:
+
+dnl  1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay.
+dnl  2. Only aligned instruction pairs can be paired.
+dnl  3. The store buffer or silo might not be able to deal with the bandwidth.
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+	ldq	r4,0(r17)	C load first limb
+	addq	r17,8,r17
+	subq	r31,r19,r7
+	subq	r18,1,r18
+	and	r18,4-1,r20	C number of limbs in first loop
+	sll	r4,r7,r0	C compute function result
+
+	beq	r20,$L0
+	subq	r18,r20,r18
+
+	ALIGN(8)
+$Loop0:
+	ldq	r3,0(r17)
+	addq	r16,8,r16
+	addq	r17,8,r17
+	subq	r20,1,r20
+	srl	r4,r19,r5
+	sll	r3,r7,r6
+	bis	r3,r3,r4
+	bis	r5,r6,r8
+	stq	r8,-8(r16)
+	bne	r20,$Loop0
+
+$L0:	beq	r18,$Lend
+
+	ALIGN(8)
+$Loop:	ldq	r3,0(r17)
+	addq	r16,32,r16
+	subq	r18,4,r18
+	srl	r4,r19,r5
+	sll	r3,r7,r6
+
+	ldq	r4,8(r17)
+	srl	r3,r19,r1
+	bis	r5,r6,r8
+	stq	r8,-32(r16)
+	sll	r4,r7,r2
+
+	ldq	r3,16(r17)
+	srl	r4,r19,r5
+	bis	r1,r2,r8
+	stq	r8,-24(r16)
+	sll	r3,r7,r6
+
+	ldq	r4,24(r17)
+	srl	r3,r19,r1
+	bis	r5,r6,r8
+	stq	r8,-16(r16)
+	sll	r4,r7,r2
+
+	addq	r17,32,r17
+	bis	r1,r2,r8
+	stq	r8,-8(r16)
+
+	bgt	r18,$Loop
+
+$Lend:	srl	r4,r19,r8
+	stq	r8,0(r16)
+	ret	r31,(r26),1
+EPILOGUE(mpn_rshift)
+ASM_END()
diff --git a/mpn/alpha/rshift.s b/mpn/alpha/rshift.s
deleted file mode 100644
index 12a3e369d..000000000
--- a/mpn/alpha/rshift.s
+++ /dev/null
@@ -1,107 +0,0 @@
- # Alpha __mpn_rshift --
-
- # Copyright (C) 1994, 1995 Free Software Foundation, Inc.
-
- # This file is part of the GNU MP Library.
-
- # The GNU MP Library is free software; you can redistribute it and/or modify
- # it under the terms of the GNU Library General Public License as published by
- # the Free Software Foundation; either version 2 of the License, or (at your
- # option) any later version.
-
- # The GNU MP Library is distributed in the hope that it will be useful, but
- # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
- # License for more details.
-
- # You should have received a copy of the GNU Library General Public License
- # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
- # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
- # MA 02111-1307, USA.
-
-
- # INPUT PARAMETERS
- # res_ptr	r16
- # s1_ptr	r17
- # size		r18
- # cnt		r19
-
- # This code runs at 4.8 cycles/limb on the 21064.  With infinite unrolling,
- # it would take 4 cycles/limb.  It should be possible to get down to 3
- # cycles/limb since both ldq and stq can be paired with the other used
- # instructions.  But there are many restrictions in the 21064 pipeline that
- # makes it hard, if not impossible, to get down to 3 cycles/limb:
-
- # 1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay.
- # 2. Only aligned instruction pairs can be paired.
- # 3. The store buffer or silo might not be able to deal with the bandwidth.
-
-	.set	noreorder
-	.set	noat
-.text
-	.align	3
-	.globl	__mpn_rshift
-	.ent	__mpn_rshift
-__mpn_rshift:
-	.frame	$30,0,$26,0
-
-	ldq	$4,0($17)	# load first limb
-	addq	$17,8,$17
-	subq	$31,$19,$7
-	subq	$18,1,$18
-	and	$18,4-1,$20	# number of limbs in first loop
-	sll	$4,$7,$0	# compute function result
-
-	beq	$20,.L0
-	subq	$18,$20,$18
-
-	.align	3
-.Loop0:
-	ldq	$3,0($17)
-	addq	$16,8,$16
-	addq	$17,8,$17
-	subq	$20,1,$20
-	srl	$4,$19,$5
-	sll	$3,$7,$6
-	bis	$3,$3,$4
-	bis	$5,$6,$8
-	stq	$8,-8($16)
-	bne	$20,.Loop0
-
-.L0:	beq	$18,.Lend
-
-	.align	3
-.Loop:	ldq	$3,0($17)
-	addq	$16,32,$16
-	subq	$18,4,$18
-	srl	$4,$19,$5
-	sll	$3,$7,$6
-
-	ldq	$4,8($17)
-	srl	$3,$19,$1
-	bis	$5,$6,$8
-	stq	$8,-32($16)
-	sll	$4,$7,$2
-
-	ldq	$3,16($17)
-	srl	$4,$19,$5
-	bis	$1,$2,$8
-	stq	$8,-24($16)
-	sll	$3,$7,$6
-
-	ldq	$4,24($17)
-	srl	$3,$19,$1
-	bis	$5,$6,$8
-	stq	$8,-16($16)
-	sll	$4,$7,$2
-
-	addq	$17,32,$17
-	bis	$1,$2,$8
-	stq	$8,-8($16)
-
-	bgt	$18,.Loop
-
-.Lend:	srl	$4,$19,$8
-	stq	$8,0($16)
-	ret	$31,($26),1
-	.end	__mpn_rshift
diff --git a/mpn/alpha/sub_n.asm b/mpn/alpha/sub_n.asm
new file mode 100644
index 000000000..e227af553
--- /dev/null
+++ b/mpn/alpha/sub_n.asm
@@ -0,0 +1,114 @@
+dnl  Alpha mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+dnl  store difference in a third limb vector.
+
+dnl  Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Library General Public License as published by
+dnl  the Free Software Foundation; either version 2 of the License, or (at your
+dnl  option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Library General Public License
+dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl  MA 02111-1307, USA.
+
+include(`../config.m4')
+
+dnl  INPUT PARAMETERS
+dnl  res_ptr	r16
+dnl  s1_ptr	r17
+dnl  s2_ptr	r18
+dnl  size	r19
+
+ASM_START()
+PROLOGUE(mpn_sub_n)
+	ldq	r3,0(r17)
+	ldq	r4,0(r18)
+
+	subq	r19,1,r19
+	and	r19,4-1,r2	C number of limbs in first loop
+	bis	r31,r31,r0
+	beq	r2,$L0		C if multiple of 4 limbs, skip first loop
+
+	subq	r19,r2,r19
+
+$Loop0:	subq	r2,1,r2
+	ldq	r5,8(r17)
+	addq	r4,r0,r4
+	ldq	r6,8(r18)
+	cmpult	r4,r0,r1
+	subq	r3,r4,r4
+	cmpult	r3,r4,r0
+	stq	r4,0(r16)
+	bis	r0,r1,r0
+
+	addq	r17,8,r17
+	addq	r18,8,r18
+	bis	r5,r5,r3
+	bis	r6,r6,r4
+	addq	r16,8,r16
+	bne	r2,$Loop0
+
+$L0:	beq	r19,$Lend
+
+	ALIGN(8)
+$Loop:	subq	r19,4,r19
+
+	ldq	r5,8(r17)
+	addq	r4,r0,r4
+	ldq	r6,8(r18)
+	cmpult	r4,r0,r1
+	subq	r3,r4,r4
+	cmpult	r3,r4,r0
+	stq	r4,0(r16)
+	bis	r0,r1,r0
+
+	ldq	r3,16(r17)
+	addq	r6,r0,r6
+	ldq	r4,16(r18)
+	cmpult	r6,r0,r1
+	subq	r5,r6,r6
+	cmpult	r5,r6,r0
+	stq	r6,8(r16)
+	bis	r0,r1,r0
+
+	ldq	r5,24(r17)
+	addq	r4,r0,r4
+	ldq	r6,24(r18)
+	cmpult	r4,r0,r1
+	subq	r3,r4,r4
+	cmpult	r3,r4,r0
+	stq	r4,16(r16)
+	bis	r0,r1,r0
+
+	ldq	r3,32(r17)
+	addq	r6,r0,r6
+	ldq	r4,32(r18)
+	cmpult	r6,r0,r1
+	subq	r5,r6,r6
+	cmpult	r5,r6,r0
+	stq	r6,24(r16)
+	bis	r0,r1,r0
+
+	addq	r17,32,r17
+	addq	r18,32,r18
+	addq	r16,32,r16
+	bne	r19,$Loop
+
+$Lend:	addq	r4,r0,r4
+	cmpult	r4,r0,r1
+	subq	r3,r4,r4
+	cmpult	r3,r4,r0
+	stq	r4,0(r16)
+	bis	r0,r1,r0
+	ret	r31,(r26),1
+EPILOGUE(mpn_sub_n)
+ASM_END()
diff --git a/mpn/alpha/sub_n.s b/mpn/alpha/sub_n.s
deleted file mode 100644
index 3c90c1169..000000000
--- a/mpn/alpha/sub_n.s
+++ /dev/null
@@ -1,120 +0,0 @@
- # Alpha __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
- # store difference in a third limb vector.
-
- # Copyright (C) 1995 Free Software Foundation, Inc.
-
- # This file is part of the GNU MP Library.
-
- # The GNU MP Library is free software; you can redistribute it and/or modify
- # it under the terms of the GNU Library General Public License as published by
- # the Free Software Foundation; either version 2 of the License, or (at your
- # option) any later version.
-
- # The GNU MP Library is distributed in the hope that it will be useful, but
- # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
- # License for more details.
-
- # You should have received a copy of the GNU Library General Public License
- # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
- # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
- # MA 02111-1307, USA.
-
-
- # INPUT PARAMETERS
- # res_ptr	$16
- # s1_ptr	$17
- # s2_ptr	$18
- # size		$19
-
-	.set	noreorder
-	.set	noat
-.text
-	.align	3
-	.globl	__mpn_sub_n
-	.ent	__mpn_sub_n
-__mpn_sub_n:
-	.frame	$30,0,$26,0
-
-	ldq	$3,0($17)
-	ldq	$4,0($18)
-
-	subq	$19,1,$19
-	and	$19,4-1,$2	# number of limbs in first loop
-	bis	$31,$31,$0
-	beq	$2,.L0		# if multiple of 4 limbs, skip first loop
-
-	subq	$19,$2,$19
-
-.Loop0:	subq	$2,1,$2
-	ldq	$5,8($17)
-	addq	$4,$0,$4
-	ldq	$6,8($18)
-	cmpult	$4,$0,$1
-	subq	$3,$4,$4
-	cmpult	$3,$4,$0
-	stq	$4,0($16)
-	or	$0,$1,$0
-
-	addq	$17,8,$17
-	addq	$18,8,$18
-	bis	$5,$5,$3
-	bis	$6,$6,$4
-	addq	$16,8,$16
-	bne	$2,.Loop0
-
-.L0:	beq	$19,.Lend
-
-	.align	3
-.Loop:	subq	$19,4,$19
-
-	ldq	$5,8($17)
-	addq	$4,$0,$4
-	ldq	$6,8($18)
-	cmpult	$4,$0,$1
-	subq	$3,$4,$4
-	cmpult	$3,$4,$0
-	stq	$4,0($16)
-	or	$0,$1,$0
-
-	ldq	$3,16($17)
-	addq	$6,$0,$6
-	ldq	$4,16($18)
-	cmpult	$6,$0,$1
-	subq	$5,$6,$6
-	cmpult	$5,$6,$0
-	stq	$6,8($16)
-	or	$0,$1,$0
-
-	ldq	$5,24($17)
-	addq	$4,$0,$4
-	ldq	$6,24($18)
-	cmpult	$4,$0,$1
-	subq	$3,$4,$4
-	cmpult	$3,$4,$0
-	stq	$4,16($16)
-	or	$0,$1,$0
-
-	ldq	$3,32($17)
-	addq	$6,$0,$6
-	ldq	$4,32($18)
-	cmpult	$6,$0,$1
-	subq	$5,$6,$6
-	cmpult	$5,$6,$0
-	stq	$6,24($16)
-	or	$0,$1,$0
-
-	addq	$17,32,$17
-	addq	$18,32,$18
-	addq	$16,32,$16
-	bne	$19,.Loop
-
-.Lend:	addq	$4,$0,$4
-	cmpult	$4,$0,$1
-	subq	$3,$4,$4
-	cmpult	$3,$4,$0
-	stq	$4,0($16)
-	or	$0,$1,$0
-	ret	$31,($26),1
-
-	.end	__mpn_sub_n
diff --git a/mpn/alpha/submul_1.asm b/mpn/alpha/submul_1.asm
new file mode 100644
index 000000000..5122d9e80
--- /dev/null
+++ b/mpn/alpha/submul_1.asm
@@ -0,0 +1,87 @@
+dnl  Alpha __mpn_submul_1 -- Multiply a limb vector with a limb and
+dnl  subtract the result from a second limb vector.
+
+dnl  Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Library General Public License as published by
+dnl  the Free Software Foundation; either version 2 of the License, or (at your
+dnl  option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Library General Public License
+dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl  MA 02111-1307, USA.
+
+include(`../config.m4')
+
+dnl  INPUT PARAMETERS
+dnl  res_ptr	r16
+dnl  s1_ptr	r17
+dnl  size	r18
+dnl  s2_limb	r19
+
+dnl  This code runs at 42 cycles/limb on EV4, 18 cycles/limb on EV5, and 7
+dnl  cycles/limb on EV6.
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+	ldq	r2,0(r17)	C r2 = s1_limb
+	addq	r17,8,r17	C s1_ptr++
+	subq	r18,1,r18	C size--
+	mulq	r2,r19,r3	C r3 = prod_low
+	ldq	r5,0(r16)	C r5 = *res_ptr
+	umulh	r2,r19,r0	C r0 = prod_high
+	beq	r18,$Lend1	C jump if size was == 1
+	ldq	r2,0(r17)	C r2 = s1_limb
+	addq	r17,8,r17	C s1_ptr++
+	subq	r18,1,r18	C size--
+	subq	r5,r3,r3
+	cmpult	r5,r3,r4
+	stq	r3,0(r16)
+	addq	r16,8,r16	C res_ptr++
+	beq	r18,$Lend2	C jump if size was == 2
+
+	ALIGN(8)
+$Loop:	mulq	r2,r19,r3	C r3 = prod_low
+	ldq	r5,0(r16)	C r5 = *res_ptr
+	addq	r4,r0,r0	C cy_limb = cy_limb + 'cy'
+	subq	r18,1,r18	C size--
+	umulh	r2,r19,r4	C r4 = cy_limb
+	ldq	r2,0(r17)	C r2 = s1_limb
+	addq	r17,8,r17	C s1_ptr++
+	addq	r3,r0,r3	C r3 = cy_limb + prod_low
+	cmpult	r3,r0,r0	C r0 = carry from (cy_limb + prod_low)
+	subq	r5,r3,r3
+	cmpult	r5,r3,r5
+	stq	r3,0(r16)
+	addq	r16,8,r16	C res_ptr++
+	addq	r5,r0,r0	C combine carries
+	bne	r18,$Loop
+
+$Lend2:	mulq	r2,r19,r3	C r3 = prod_low
+	ldq	r5,0(r16)	C r5 = *res_ptr
+	addq	r4,r0,r0	C cy_limb = cy_limb + 'cy'
+	umulh	r2,r19,r4	C r4 = cy_limb
+	addq	r3,r0,r3	C r3 = cy_limb + prod_low
+	cmpult	r3,r0,r0	C r0 = carry from (cy_limb + prod_low)
+	subq	r5,r3,r3
+	cmpult	r5,r3,r5
+	stq	r3,0(r16)
+	addq	r5,r0,r0	C combine carries
+	addq	r4,r0,r0	C cy_limb = prod_high + cy
+	ret	r31,(r26),1
+$Lend1:	subq	r5,r3,r3
+	cmpult	r5,r3,r5
+	stq	r3,0(r16)
+	addq	r0,r5,r0
+	ret	r31,(r26),1
+EPILOGUE(mpn_submul_1)
+ASM_END()
diff --git a/mpn/alpha/submul_1.s b/mpn/alpha/submul_1.s
deleted file mode 100644
index 319c10f07..000000000
--- a/mpn/alpha/submul_1.s
+++ /dev/null
@@ -1,92 +0,0 @@
- # Alpha __mpn_submul_1 -- Multiply a limb vector with a limb and
- # subtract the result from a second limb vector.
-
- # Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
-
- # This file is part of the GNU MP Library.
-
- # The GNU MP Library is free software; you can redistribute it and/or modify
- # it under the terms of the GNU Library General Public License as published by
- # the Free Software Foundation; either version 2 of the License, or (at your
- # option) any later version.
-
- # The GNU MP Library is distributed in the hope that it will be useful, but
- # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
- # License for more details.
-
- # You should have received a copy of the GNU Library General Public License
- # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
- # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
- # MA 02111-1307, USA.
-
-
- # INPUT PARAMETERS
- # res_ptr	r16
- # s1_ptr	r17
- # size		r18
- # s2_limb	r19
-
- # This code runs at 42 cycles/limb on EV4 and 18 cycles/limb on EV5.
-
-	.set	noreorder
-	.set	noat
-.text
-	.align	3
-	.globl	__mpn_submul_1
-	.ent	__mpn_submul_1 2
-__mpn_submul_1:
-	.frame	$30,0,$26
-
-	ldq	$2,0($17)	# $2 = s1_limb
-	addq	$17,8,$17	# s1_ptr++
-	subq	$18,1,$18	# size--
-	mulq	$2,$19,$3	# $3 = prod_low
-	ldq	$5,0($16)	# $5 = *res_ptr
-	umulh	$2,$19,$0	# $0 = prod_high
-	beq	$18,.Lend1	# jump if size was == 1
-	ldq	$2,0($17)	# $2 = s1_limb
-	addq	$17,8,$17	# s1_ptr++
-	subq	$18,1,$18	# size--
-	subq	$5,$3,$3
-	cmpult	$5,$3,$4
-	stq	$3,0($16)
-	addq	$16,8,$16	# res_ptr++
-	beq	$18,.Lend2	# jump if size was == 2
-
-	.align	3
-.Loop:	mulq	$2,$19,$3	# $3 = prod_low
-	ldq	$5,0($16)	# $5 = *res_ptr
-	addq	$4,$0,$0	# cy_limb = cy_limb + 'cy'
-	subq	$18,1,$18	# size--
-	umulh	$2,$19,$4	# $4 = cy_limb
-	ldq	$2,0($17)	# $2 = s1_limb
-	addq	$17,8,$17	# s1_ptr++
-	addq	$3,$0,$3	# $3 = cy_limb + prod_low
-	cmpult	$3,$0,$0	# $0 = carry from (cy_limb + prod_low)
-	subq	$5,$3,$3
-	cmpult	$5,$3,$5
-	stq	$3,0($16)
-	addq	$16,8,$16	# res_ptr++
-	addq	$5,$0,$0	# combine carries
-	bne	$18,.Loop
-
-.Lend2:	mulq	$2,$19,$3	# $3 = prod_low
-	ldq	$5,0($16)	# $5 = *res_ptr
-	addq	$4,$0,$0	# cy_limb = cy_limb + 'cy'
-	umulh	$2,$19,$4	# $4 = cy_limb
-	addq	$3,$0,$3	# $3 = cy_limb + prod_low
-	cmpult	$3,$0,$0	# $0 = carry from (cy_limb + prod_low)
-	subq	$5,$3,$3
-	cmpult	$5,$3,$5
-	stq	$3,0($16)
-	addq	$5,$0,$0	# combine carries
-	addq	$4,$0,$0	# cy_limb = prod_high + cy
-	ret	$31,($26),1
-.Lend1:	subq	$5,$3,$3
-	cmpult	$5,$3,$5
-	stq	$3,0($16)
-	addq	$0,$5,$0
-	ret	$31,($26),1
-
-	.end	__mpn_submul_1
diff --git a/mpn/alpha/unicos.m4 b/mpn/alpha/unicos.m4
new file mode 100644
index 000000000..93d97b7e1
--- /dev/null
+++ b/mpn/alpha/unicos.m4
@@ -0,0 +1,41 @@
+divert(-1)
+
+define(`ASM_START',
+	`.ident	dummy')
+
+define(`X',`^X$1')
+define(`INT64',
+	`dnl
+	.psect	$1@crud,data
+$1:	.quad	$2
+	.endp')
+
+define(`PROLOGUE',
+	`dnl
+	.stack	192		; What does this mean?  Only Cray knows.
+	.psect	$1@code,code,cache
+$1::')
+define(`PROLOGUE_GP', `PROLOGUE($1)')
+
+define(`EPILOGUE',
+	`dnl
+	.endp')
+
+define(`DATASTART',
+	`dnl
+	.psect	$1@crud,data
+$1:')
+define(`DATAEND',
+	`dnl
+	.endp')
+
+define(`ASM_END',
+	`dnl
+	.end')
+
+define(`unop',`bis r31,r31,r31') ; Unicos assembler lacks unop
+
+define(`ALIGN',`')		; Unicos assembler seems to align using garbage
+
+divert
+
author	tege <tege@gmplib.org>	2000-03-17 07:08:03 +0100
committer	tege <tege@gmplib.org>	2000-03-17 07:08:03 +0100
commit	106b678e54ca07182002077f4421890710626aa4 (patch)
tree	d3fe95a81926ddd8bc57a083ec50485ffaffee65 /mpn/alpha
parent	ef6177983d51165d7e0ef15a52b63fea72169805 (diff)
download	gmp-106b678e54ca07182002077f4421890710626aa4.tar.gz