Moved from mpn/hppa.

author: tege <tege@gmplib.org> 2002-03-26 17:30:50 +0100
committer: tege <tege@gmplib.org> 2002-03-26 17:30:50 +0100
commit: 59ccffe8a22a0c991f9c44a8c17db500f9b08a51 (patch)
tree: f2f02ca330e0de3bd9b186f54071a1c8f1b195a5 /mpn/pa32
parent: 337d7796e7aadc1055f2e2e4c89f5849f6a1f994 (diff)
download: gmp-59ccffe8a22a0c991f9c44a8c17db500f9b08a51.tar.gz
24 files changed, 2330 insertions, 0 deletions
diff --git a/mpn/pa32/README b/mpn/pa32/README
new file mode 100644
index 000000000..b7c26318d
--- /dev/null
+++ b/mpn/pa32/README
@@ -0,0 +1,143 @@
+Copyright 1996, 1999, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA.
+
+
+
+
+
+
+This directory contains mpn functions for various HP PA-RISC chips.  Code
+that runs faster on the PA7100 and later implementations, is in the pa7100
+directory.
+
+RELEVANT OPTIMIZATION ISSUES
+
+  Load and Store timing
+
+On the PA7000 no memory instructions can issue the two cycles after a store.
+For the PA7100, this is reduced to one cycle.
+
+The PA7100 has a lookup-free cache, so it helps to schedule loads and the
+dependent instruction really far from each other.
+
+STATUS
+
+1. mpn_mul_1 could be improved to 6.5 cycles/limb on the PA7100, using the
+   instructions below (but some sw pipelining is needed to avoid the
+   xmpyu-fstds delay):
+
+	fldds	s1_ptr
+
+	xmpyu
+	fstds	N(%r30)
+	xmpyu
+	fstds	N(%r30)
+
+	ldws	N(%r30)
+	ldws	N(%r30)
+	ldws	N(%r30)
+	ldws	N(%r30)
+
+	addc
+	stws	res_ptr
+	addc
+	stws	res_ptr
+
+	addib	Loop
+
+2. mpn_addmul_1 could be improved from the current 10 to 7.5 cycles/limb
+   (asymptotically) on the PA7100, using the instructions below.  With proper
+   sw pipelining and the unrolling level below, the speed becomes 8
+   cycles/limb.
+
+	fldds	s1_ptr
+	fldds	s1_ptr
+
+	xmpyu
+	fstds	N(%r30)
+	xmpyu
+	fstds	N(%r30)
+	xmpyu
+	fstds	N(%r30)
+	xmpyu
+	fstds	N(%r30)
+
+	ldws	N(%r30)
+	ldws	N(%r30)
+	ldws	N(%r30)
+	ldws	N(%r30)
+	ldws	N(%r30)
+	ldws	N(%r30)
+	ldws	N(%r30)
+	ldws	N(%r30)
+	addc
+	addc
+	addc
+	addc
+	addc	%r0,%r0,cy-limb
+
+	ldws	res_ptr
+	ldws	res_ptr
+	ldws	res_ptr
+	ldws	res_ptr
+	add
+	stws	res_ptr
+	addc
+	stws	res_ptr
+	addc
+	stws	res_ptr
+	addc
+	stws	res_ptr
+
+	addib
+
+3. For the PA8000 we have to stick to using 32-bit limbs before compiler
+   support emerges.  But we want to use 64-bit operations whenever possible,
+   in particular for loads and stores.  It is possible to handle mpn_add_n
+   efficiently by rotating (when s1/s2 are aligned), masking+bit field
+   inserting when (they are not).  The speed should double compared to the
+   code used today.
+
+
+
+
+LABEL SYNTAX
+
+The HP-UX assembler takes labels starting in column 0 with no colon,
+
+	L$loop  ldws,mb -4(0,%r25),%r22
+
+Gas on hppa GNU/Linux however requires a colon,
+
+	L$loop: ldws,mb -4(0,%r25),%r22
+
+Fortunately both accept a ".label" pseudo-op,
+
+	        .label  L$loop
+                ldws,mb -4(0,%r25),%r22
+
+
+
+
+
+----------------
+Local variables:
+mode: text
+fill-column: 76
+End:
diff --git a/mpn/pa32/add_n.asm b/mpn/pa32/add_n.asm
new file mode 100644
index 000000000..9b201c445
--- /dev/null
+++ b/mpn/pa32/add_n.asm
@@ -0,0 +1,54 @@
+dnl  HP-PA mpn_add_n -- Add two limb vectors of the same length > 0 and store
+dnl  sum in a third limb vector.
+
+dnl  Copyright 1992, 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl  MA 02111-1307, USA.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr	gr26
+C s1_ptr	gr25
+C s2_ptr	gr24
+C size		gr23
+
+C One might want to unroll this as for other processors, but it turns out that
+C the data cache contention after a store makes such unrolling useless.  We
+C can't come under 5 cycles/limb anyway.
+
+ASM_START()
+PROLOGUE(mpn_add_n)
+	ldws,ma		4(0,%r25),%r20
+	ldws,ma		4(0,%r24),%r19
+
+	addib,=		-1,%r23,L(end)	C check for (SIZE == 1)
+	 add		%r20,%r19,%r28	C add first limbs ignoring cy
+
+	.label	L(loop)
+	ldws,ma		4(0,%r25),%r20
+	ldws,ma		4(0,%r24),%r19
+	stws,ma		%r28,4(0,%r26)
+	addib,<>	-1,%r23,L(loop)
+	 addc		%r20,%r19,%r28
+
+	.label	L(end)
+	stws		%r28,0(0,%r26)
+	bv		0(%r2)
+	 addc		%r0,%r0,%r28
+EPILOGUE()
diff --git a/mpn/pa32/gmp-mparam.h b/mpn/pa32/gmp-mparam.h
new file mode 100644
index 000000000..2fec625c3
--- /dev/null
+++ b/mpn/pa32/gmp-mparam.h
@@ -0,0 +1,56 @@
+/* HP-PA 1.0 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
+
+/* These values are for the PA7100 using GCC.  */
+/* Generated by tuneup.c, 2000-10-27. */
+
+#ifndef MUL_KARATSUBA_THRESHOLD
+#define MUL_KARATSUBA_THRESHOLD   30
+#endif
+#ifndef MUL_TOOM3_THRESHOLD
+#define MUL_TOOM3_THRESHOLD      141
+#endif
+
+#ifndef SQR_KARATSUBA_THRESHOLD
+#define SQR_KARATSUBA_THRESHOLD   59
+#endif
+#ifndef SQR_TOOM3_THRESHOLD
+#define SQR_TOOM3_THRESHOLD      177
+#endif
+
+#ifndef DIV_DC_THRESHOLD
+#define DIV_DC_THRESHOLD             108
+#endif
+
+#ifndef POWM_THRESHOLD
+#define POWM_THRESHOLD            18
+#endif
+
+#ifndef GCD_ACCEL_THRESHOLD
+#define GCD_ACCEL_THRESHOLD       46
+#endif
+#ifndef GCDEXT_THRESHOLD
+#define GCDEXT_THRESHOLD          33
+#endif
diff --git a/mpn/pa32/hppa1_1/addmul_1.asm b/mpn/pa32/hppa1_1/addmul_1.asm
new file mode 100644
index 000000000..40fefe505
--- /dev/null
+++ b/mpn/pa32/hppa1_1/addmul_1.asm
@@ -0,0 +1,98 @@
+dnl  HP-PA 1.1 mpn_addmul_1 -- Multiply a limb vector with a limb and add the
+dnl  result to a second limb vector.
+
+dnl  Copyright 1992, 1993, 1994, 2000, 2001, 2002 Free Software Foundation,
+dnl  Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl  MA 02111-1307, USA.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr	r26
+C s1_ptr	r25
+C size		r24
+C s2_limb	r23
+
+C This runs at 11 cycles/limb on a PA7000.  With the used instructions, it can
+C not become faster due to data cache contention after a store.  On the PA7100
+C it runs at 10 cycles/limb.
+
+C There are some ideas described in mul_1.asm that applies to this code too.
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+C	.callinfo	frame=64,no_calls
+
+	ldo		64(%r30),%r30
+	fldws,ma	4(%r25),%fr5
+	stw		%r23,-16(%r30)		C move s2_limb ...
+	addib,=		-1,%r24,L(just_one_limb)
+	 fldws		-16(%r30),%fr4		C ... into fr4
+	add		%r0,%r0,%r0		C clear carry
+	xmpyu		%fr4,%fr5,%fr6
+	fldws,ma	4(%r25),%fr7
+	fstds		%fr6,-16(%r30)
+	xmpyu		%fr4,%fr7,%fr8
+	ldw		-12(%r30),%r19		C least significant limb in product
+	ldw		-16(%r30),%r28
+
+	fstds		%fr8,-16(%r30)
+	addib,=		-1,%r24,L(end)
+	 ldw		-12(%r30),%r1
+
+C Main loop
+	.label	L(loop)
+	ldws		0(%r26),%r29
+	fldws,ma	4(%r25),%fr5
+	add		%r29,%r19,%r19
+	stws,ma		%r19,4(%r26)
+	addc		%r28,%r1,%r19
+	xmpyu		%fr4,%fr5,%fr6
+	ldw		-16(%r30),%r28
+	fstds		%fr6,-16(%r30)
+	addc		%r0,%r28,%r28
+	addib,<>	-1,%r24,L(loop)
+	 ldw		-12(%r30),%r1
+
+	.label	L(end)
+	ldw		0(%r26),%r29
+	add		%r29,%r19,%r19
+	stws,ma		%r19,4(%r26)
+	addc		%r28,%r1,%r19
+	ldw		-16(%r30),%r28
+	ldws		0(%r26),%r29
+	addc		%r0,%r28,%r28
+	add		%r29,%r19,%r19
+	stws,ma		%r19,4(%r26)
+	addc		%r0,%r28,%r28
+	bv		0(%r2)
+	 ldo		-64(%r30),%r30
+
+	.label	L(just_one_limb)
+	xmpyu		%fr4,%fr5,%fr6
+	ldw		0(%r26),%r29
+	fstds		%fr6,-16(%r30)
+	ldw		-12(%r30),%r1
+	ldw		-16(%r30),%r28
+	add		%r29,%r1,%r19
+	stw		%r19,0(%r26)
+	addc		%r0,%r28,%r28
+	bv		0(%r2)
+	 ldo		-64(%r30),%r30
+EPILOGUE()
diff --git a/mpn/pa32/hppa1_1/gmp-mparam.h b/mpn/pa32/hppa1_1/gmp-mparam.h
new file mode 100644
index 000000000..f14ac6643
--- /dev/null
+++ b/mpn/pa32/hppa1_1/gmp-mparam.h
@@ -0,0 +1,63 @@
+/* HP-PA 1.1 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
+
+/* Generated by tuneup.c, 2002-03-07, gcc 2.8 (pa7100/100MHz) */
+
+#define MUL_KARATSUBA_THRESHOLD          30
+#define MUL_TOOM3_THRESHOLD             141
+
+#define SQR_BASECASE_THRESHOLD            4
+#define SQR_KARATSUBA_THRESHOLD          55
+#define SQR_TOOM3_THRESHOLD             185
+
+#define DIV_SB_PREINV_THRESHOLD           0  /* always */
+#define DIV_DC_THRESHOLD                 95
+#define POWM_THRESHOLD                  150
+
+#define GCD_ACCEL_THRESHOLD               3
+#define GCDEXT_THRESHOLD                  0  /* always */
+#define JACOBI_BASE_METHOD                2
+
+#define DIVREM_1_NORM_THRESHOLD           3
+#define DIVREM_1_UNNORM_THRESHOLD         6
+#define MOD_1_NORM_THRESHOLD              3
+#define MOD_1_UNNORM_THRESHOLD            6
+#define USE_PREINV_DIVREM_1               1
+#define USE_PREINV_MOD_1                  1
+#define DIVREM_2_THRESHOLD                0  /* always */
+#define DIVEXACT_1_THRESHOLD              0  /* always */
+#define MODEXACT_1_ODD_THRESHOLD          0  /* always */
+
+#define GET_STR_DC_THRESHOLD             13
+#define GET_STR_PRECOMPUTE_THRESHOLD     23
+#define SET_STR_THRESHOLD              6589
+
+#define MUL_FFT_TABLE  { 592, 1440, 2688, 5632, 14336, 40960, 0 }
+#define MUL_FFT_MODF_THRESHOLD          608
+#define MUL_FFT_THRESHOLD              5888
+
+#define SQR_FFT_TABLE  { 624, 1504, 2688, 6656, 18432, 40960, 0 }
+#define SQR_FFT_MODF_THRESHOLD          640
+#define SQR_FFT_THRESHOLD              5376
diff --git a/mpn/pa32/hppa1_1/mul_1.asm b/mpn/pa32/hppa1_1/mul_1.asm
new file mode 100644
index 000000000..bc336c55e
--- /dev/null
+++ b/mpn/pa32/hppa1_1/mul_1.asm
@@ -0,0 +1,94 @@
+dnl  HP-PA 1.1 mpn_mul_1 -- Multiply a limb vector with a limb and store the
+dnl  result in a second limb vector.
+
+dnl  Copyright 1992, 1993, 1994, 2000, 2001, 2002 Free Software Foundation,
+dnl  Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl  MA 02111-1307, USA.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr	r26
+C s1_ptr	r25
+C size		r24
+C s2_limb	r23
+
+C This runs at 9 cycles/limb on a PA7000.  With the used instructions, it can
+C not become faster due to data cache contention after a store.  On the PA7100
+C it runs at 7 cycles/limb.
+
+C We could use fldds to read two limbs at a time from the S1 array, and that
+C could bring down the times to 8.5 and 6.5 cycles/limb for the PA7000 and
+C PA7100, respectively.  We don't do that since it does not seem worth the
+C (alignment) troubles...
+
+C At least the PA7100 is rumored to be able to deal with cache-misses without
+C stalling instruction issue.  If this is true, and the cache is actually also
+C lockup-free, we should use a deeper software pipeline, and load from S1 very
+C early!  (The loads and stores to -12(sp) will surely be in the cache.)
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+C	.callinfo	frame=64,no_calls
+
+	ldo		64(%r30),%r30
+	fldws,ma	4(%r25),%fr5
+	stw		%r23,-16(%r30)		C move s2_limb ...
+	addib,=		-1,%r24,L(just_one_limb)
+	 fldws		-16(%r30),%fr4		C ... into fr4
+	add		%r0,%r0,%r0		C clear carry
+	xmpyu		%fr4,%fr5,%fr6
+	fldws,ma	4(%r25),%fr7
+	fstds	 	%fr6,-16(%r30)
+	xmpyu		%fr4,%fr7,%fr8
+	ldw		-12(%r30),%r19		C least significant limb in product
+	ldw		-16(%r30),%r28
+
+	fstds		%fr8,-16(%r30)
+	addib,=		-1,%r24,L(end)
+	 ldw		-12(%r30),%r1
+
+C Main loop
+	.label	L(loop)
+	fldws,ma	4(%r25),%fr5
+	stws,ma		%r19,4(%r26)
+	addc		%r28,%r1,%r19
+	xmpyu		%fr4,%fr5,%fr6
+	ldw		-16(%r30),%r28
+	fstds		%fr6,-16(%r30)
+	addib,<>	-1,%r24,L(loop)
+	 ldw		-12(%r30),%r1
+
+	.label	L(end)
+	stws,ma		%r19,4(%r26)
+	addc		%r28,%r1,%r19
+	ldw		-16(%r30),%r28
+	stws,ma		%r19,4(%r26)
+	addc		%r0,%r28,%r28
+	bv		0(%r2)
+	 ldo		-64(%r30),%r30
+
+	.label	L(just_one_limb)
+	xmpyu		%fr4,%fr5,%fr6
+	fstds		%fr6,-16(%r30)
+	ldw		-16(%r30),%r28
+	ldo		-64(%r30),%r30
+	bv		0(%r2)
+	 fstws		%fr6R,0(%r26)
+EPILOGUE()
diff --git a/mpn/pa32/hppa1_1/pa7100/add_n.asm b/mpn/pa32/hppa1_1/pa7100/add_n.asm
new file mode 100644
index 000000000..e515456b6
--- /dev/null
+++ b/mpn/pa32/hppa1_1/pa7100/add_n.asm
@@ -0,0 +1,74 @@
+dnl  HP-PA mpn_add_n -- Add two limb vectors of the same length > 0 and store
+dnl  sum in a third limb vector.  Optimized for the PA7100, where is runs at
+dnl  4.25 cycles/limb.
+
+dnl  Copyright 1992, 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl  MA 02111-1307, USA.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr	r26
+C s1_ptr	r25
+C s2_ptr	r24
+C size		r23
+
+ASM_START()
+PROLOGUE(mpn_add_n)
+	ldws,ma		4(0,%r25),%r20
+	ldws,ma		4(0,%r24),%r19
+
+	addib,<=	-5,%r23,L(rest)
+	 add		%r20,%r19,%r28	C add first limbs ignoring cy
+
+	.label	L(loop)
+	ldws,ma		4(0,%r25),%r20
+	ldws,ma		4(0,%r24),%r19
+	stws,ma		%r28,4(0,%r26)
+	addc		%r20,%r19,%r28
+	ldws,ma		4(0,%r25),%r20
+	ldws,ma		4(0,%r24),%r19
+	stws,ma		%r28,4(0,%r26)
+	addc		%r20,%r19,%r28
+	ldws,ma		4(0,%r25),%r20
+	ldws,ma		4(0,%r24),%r19
+	stws,ma		%r28,4(0,%r26)
+	addc		%r20,%r19,%r28
+	ldws,ma		4(0,%r25),%r20
+	ldws,ma		4(0,%r24),%r19
+	stws,ma		%r28,4(0,%r26)
+	addib,>		-4,%r23,L(loop)
+	addc		%r20,%r19,%r28
+
+	.label	L(rest)
+	addib,=		4,%r23,L(end)
+	nop
+
+	.label	L(eloop)
+	ldws,ma		4(0,%r25),%r20
+	ldws,ma		4(0,%r24),%r19
+	stws,ma		%r28,4(0,%r26)
+	addib,>		-1,%r23,L(eloop)
+	addc		%r20,%r19,%r28
+
+	.label	L(end)
+	stws		%r28,0(0,%r26)
+	bv		0(%r2)
+	 addc		%r0,%r0,%r28
+EPILOGUE()
diff --git a/mpn/pa32/hppa1_1/pa7100/addmul_1.asm b/mpn/pa32/hppa1_1/pa7100/addmul_1.asm
new file mode 100644
index 000000000..58290c54b
--- /dev/null
+++ b/mpn/pa32/hppa1_1/pa7100/addmul_1.asm
@@ -0,0 +1,192 @@
+dnl  HP-PA 7100/7200 mpn_addmul_1 -- Multiply a limb vector with a limb and
+dnl  add the result to a second limb vector.
+
+dnl  Copyright 1995, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl  MA 02111-1307, USA.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+define(`res_ptr',`%r26')
+define(`s1_ptr',`%r25')
+define(`size',`%r24')
+define(`s2_limb',`%r23')
+
+define(`cylimb',`%r28')
+define(`s0',`%r19')
+define(`s1',`%r20')
+define(`s2',`%r3')
+define(`s3',`%r4')
+define(`lo0',`%r21')
+define(`lo1',`%r5')
+define(`lo2',`%r6')
+define(`lo3',`%r7')
+define(`hi0',`%r22')
+define(`hi1',`%r23')				C safe to reuse
+define(`hi2',`%r29')
+define(`hi3',`%r1')
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+C	.callinfo	frame=128,no_calls
+
+	ldo	128(%r30),%r30
+	stws	s2_limb,-16(%r30)
+	add	 %r0,%r0,cylimb			C clear cy and cylimb
+	addib,<	-4,size,L(few_limbs)
+	fldws	-16(%r30),%fr31R
+
+	ldo	-112(%r30),%r31
+	stw	%r3,-96(%r30)
+	stw	%r4,-92(%r30)
+	stw	%r5,-88(%r30)
+	stw	%r6,-84(%r30)
+	stw	%r7,-80(%r30)
+
+	bb,>=,n	 s1_ptr,29,L(0)
+
+	fldws,ma 4(s1_ptr),%fr4
+	ldws	 0(res_ptr),s0
+	xmpyu	 %fr4,%fr31R,%fr5
+	fstds	 %fr5,-16(%r31)
+	ldws	-16(%r31),cylimb
+	ldws	-12(%r31),lo0
+	add	 s0,lo0,s0
+	addib,< -1,size,L(few_limbs)
+	stws,ma	 s0,4(res_ptr)
+
+C start software pipeline ----------------------------------------------------
+	.label	L(0)
+	fldds,ma 8(s1_ptr),%fr4
+	fldds,ma 8(s1_ptr),%fr8
+
+	xmpyu	 %fr4L,%fr31R,%fr5
+	xmpyu	 %fr4R,%fr31R,%fr6
+	xmpyu	 %fr8L,%fr31R,%fr9
+	xmpyu	 %fr8R,%fr31R,%fr10
+
+	fstds	 %fr5,-16(%r31)
+	fstds	 %fr6,-8(%r31)
+	fstds	 %fr9,0(%r31)
+	fstds	 %fr10,8(%r31)
+
+	ldws   -16(%r31),hi0
+	ldws   -12(%r31),lo0
+	ldws	-8(%r31),hi1
+	ldws	-4(%r31),lo1
+	ldws	 0(%r31),hi2
+	ldws	 4(%r31),lo2
+	ldws	 8(%r31),hi3
+	ldws	12(%r31),lo3
+
+	addc	 lo0,cylimb,lo0
+	addc	 lo1,hi0,lo1
+	addc	 lo2,hi1,lo2
+	addc	 lo3,hi2,lo3
+
+	addib,<	 -4,size,L(end)
+	addc	 %r0,hi3,cylimb			C propagate carry into cylimb
+C main loop ------------------------------------------------------------------
+	.label	L(loop)
+	fldds,ma 8(s1_ptr),%fr4
+	fldds,ma 8(s1_ptr),%fr8
+
+	ldws	 0(res_ptr),s0
+	xmpyu	 %fr4L,%fr31R,%fr5
+	ldws	 4(res_ptr),s1
+	xmpyu	 %fr4R,%fr31R,%fr6
+	ldws	 8(res_ptr),s2
+	xmpyu	 %fr8L,%fr31R,%fr9
+	ldws	12(res_ptr),s3
+	xmpyu	 %fr8R,%fr31R,%fr10
+
+	fstds	 %fr5,-16(%r31)
+	add	 s0,lo0,s0
+	fstds	 %fr6,-8(%r31)
+	addc	 s1,lo1,s1
+	fstds	 %fr9,0(%r31)
+	addc	 s2,lo2,s2
+	fstds	 %fr10,8(%r31)
+	addc	 s3,lo3,s3
+
+	ldws   -16(%r31),hi0
+	ldws   -12(%r31),lo0
+	ldws	-8(%r31),hi1
+	ldws	-4(%r31),lo1
+	ldws	 0(%r31),hi2
+	ldws	 4(%r31),lo2
+	ldws	 8(%r31),hi3
+	ldws	12(%r31),lo3
+
+	addc	 lo0,cylimb,lo0
+	stws,ma	 s0,4(res_ptr)
+	addc	 lo1,hi0,lo1
+	stws,ma	 s1,4(res_ptr)
+	addc	 lo2,hi1,lo2
+	stws,ma	 s2,4(res_ptr)
+	addc	 lo3,hi2,lo3
+	stws,ma	 s3,4(res_ptr)
+
+	addib,>= -4,size,L(loop)
+	addc	 %r0,hi3,cylimb			C propagate carry into cylimb
+C finish software pipeline ---------------------------------------------------
+	.label	L(end)
+	ldws	 0(res_ptr),s0
+	ldws	 4(res_ptr),s1
+	ldws	 8(res_ptr),s2
+	ldws	12(res_ptr),s3
+
+	add	 s0,lo0,s0
+	stws,ma	 s0,4(res_ptr)
+	addc	 s1,lo1,s1
+	stws,ma	 s1,4(res_ptr)
+	addc	 s2,lo2,s2
+	stws,ma	 s2,4(res_ptr)
+	addc	 s3,lo3,s3
+	stws,ma	 s3,4(res_ptr)
+
+C restore callee-saves registers ---------------------------------------------
+	ldw	-96(%r30),%r3
+	ldw	-92(%r30),%r4
+	ldw	-88(%r30),%r5
+	ldw	-84(%r30),%r6
+	ldw	-80(%r30),%r7
+
+	.label	L(few_limbs)
+	addib,=,n 4,size,L(ret)
+
+	.label	L(loop2)
+	fldws,ma 4(s1_ptr),%fr4
+	ldws	 0(res_ptr),s0
+	xmpyu	 %fr4,%fr31R,%fr5
+	fstds	 %fr5,-16(%r30)
+	ldws	-16(%r30),hi0
+	ldws	-12(%r30),lo0
+	addc	 lo0,cylimb,lo0
+	addc	 %r0,hi0,cylimb
+	add	 s0,lo0,s0
+	stws,ma	 s0,4(res_ptr)
+	addib,<> -1,size,L(loop2)
+	nop
+
+	.label	L(ret)
+	addc	 %r0,cylimb,cylimb
+	bv	 0(%r2)
+	ldo	 -128(%r30),%r30
+EPILOGUE(mpn_addmul_1)
diff --git a/mpn/pa32/hppa1_1/pa7100/lshift.asm b/mpn/pa32/hppa1_1/pa7100/lshift.asm
new file mode 100644
index 000000000..336b0daaa
--- /dev/null
+++ b/mpn/pa32/hppa1_1/pa7100/lshift.asm
@@ -0,0 +1,86 @@
+dnl  HP-PA  mpn_lshift -- Shift a number left.
+dnl  Optimized for the PA7100, where is runs at 3.25 cycles/limb.
+
+dnl  Copyright 1992, 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl  MA 02111-1307, USA.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr	r26
+C s_ptr		r25
+C size		r24
+C cnt		r23
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+	sh2add		%r24,%r25,%r25
+	sh2add		%r24,%r26,%r26
+	ldws,mb		-4(0,%r25),%r22
+	subi		32,%r23,%r1
+	mtsar		%r1
+	addib,=		-1,%r24,L(0004)
+	vshd		%r0,%r22,%r28		C compute carry out limb
+	ldws,mb		-4(0,%r25),%r29
+	addib,<=	-5,%r24,L(rest)
+	vshd		%r22,%r29,%r20
+
+	.label	L(loop)
+	ldws,mb		-4(0,%r25),%r22
+	stws,mb		%r20,-4(0,%r26)
+	vshd		%r29,%r22,%r20
+	ldws,mb		-4(0,%r25),%r29
+	stws,mb		%r20,-4(0,%r26)
+	vshd		%r22,%r29,%r20
+	ldws,mb		-4(0,%r25),%r22
+	stws,mb		%r20,-4(0,%r26)
+	vshd		%r29,%r22,%r20
+	ldws,mb		-4(0,%r25),%r29
+	stws,mb		%r20,-4(0,%r26)
+	addib,>		-4,%r24,L(loop)
+	vshd		%r22,%r29,%r20
+
+	.label	L(rest)
+	addib,=		4,%r24,L(end1)
+	nop
+
+	.label	L(eloop)
+	ldws,mb		-4(0,%r25),%r22
+	stws,mb		%r20,-4(0,%r26)
+	addib,<=	-1,%r24,L(end2)
+	vshd		%r29,%r22,%r20
+	ldws,mb		-4(0,%r25),%r29
+	stws,mb		%r20,-4(0,%r26)
+	addib,>		-1,%r24,L(eloop)
+	vshd		%r22,%r29,%r20
+
+	.label	L(end1)
+	stws,mb		%r20,-4(0,%r26)
+	vshd		%r29,%r0,%r20
+	bv		0(%r2)
+	stw		%r20,-4(0,%r26)
+
+	.label	L(end2)
+	stws,mb		%r20,-4(0,%r26)
+
+	.label	L(0004)
+	vshd		%r22,%r0,%r20
+	bv		0(%r2)
+	stw		%r20,-4(0,%r26)
+EPILOGUE()
diff --git a/mpn/pa32/hppa1_1/pa7100/rshift.asm b/mpn/pa32/hppa1_1/pa7100/rshift.asm
new file mode 100644
index 000000000..17ac0eecb
--- /dev/null
+++ b/mpn/pa32/hppa1_1/pa7100/rshift.asm
@@ -0,0 +1,83 @@
+dnl  HP-PA  mpn_rshift -- Shift a number right.
+dnl  Optimized for the PA7100, where is runs at 3.25 cycles/limb.
+
+dnl  Copyright 1992, 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl  MA 02111-1307, USA.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr	r26
+C s_ptr		r25
+C size		r24
+C cnt		r23
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+	ldws,ma		4(0,%r25),%r22
+	mtsar		%r23
+	addib,=		-1,%r24,L(0004)
+	vshd		%r22,%r0,%r28		C compute carry out limb
+	ldws,ma		4(0,%r25),%r29
+	addib,<=	-5,%r24,L(rest)
+	vshd		%r29,%r22,%r20
+
+	.label	L(loop)
+	ldws,ma		4(0,%r25),%r22
+	stws,ma		%r20,4(0,%r26)
+	vshd		%r22,%r29,%r20
+	ldws,ma		4(0,%r25),%r29
+	stws,ma		%r20,4(0,%r26)
+	vshd		%r29,%r22,%r20
+	ldws,ma		4(0,%r25),%r22
+	stws,ma		%r20,4(0,%r26)
+	vshd		%r22,%r29,%r20
+	ldws,ma		4(0,%r25),%r29
+	stws,ma		%r20,4(0,%r26)
+	addib,>		-4,%r24,L(loop)
+	vshd		%r29,%r22,%r20
+
+	.label	L(rest)
+	addib,=		4,%r24,L(end1)
+	nop
+
+	.label	L(eloop)
+	ldws,ma		4(0,%r25),%r22
+	stws,ma		%r20,4(0,%r26)
+	addib,<=	-1,%r24,L(end2)
+	vshd		%r22,%r29,%r20
+	ldws,ma		4(0,%r25),%r29
+	stws,ma		%r20,4(0,%r26)
+	addib,>		-1,%r24,L(eloop)
+	vshd		%r29,%r22,%r20
+
+	.label	L(end1)
+	stws,ma		%r20,4(0,%r26)
+	vshd		%r0,%r29,%r20
+	bv		0(%r2)
+	stw		%r20,0(0,%r26)
+
+	.label	L(end2)
+	stws,ma		%r20,4(0,%r26)
+
+	.label	L(0004)
+	vshd		%r0,%r22,%r20
+	bv		0(%r2)
+	stw		%r20,0(0,%r26)
+EPILOGUE()
diff --git a/mpn/pa32/hppa1_1/pa7100/sub_n.asm b/mpn/pa32/hppa1_1/pa7100/sub_n.asm
new file mode 100644
index 000000000..14073cf19
--- /dev/null
+++ b/mpn/pa32/hppa1_1/pa7100/sub_n.asm
@@ -0,0 +1,75 @@
+dnl  HP-PA mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+dnl  store difference in a third limb vector.  Optimized for the PA7100, where
+dnl  is runs at 4.25 cycles/limb.
+
+dnl  Copyright 1992, 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl  MA 02111-1307, USA.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr	r26
+C s1_ptr	r25
+C s2_ptr	r24
+C size		r23
+
+ASM_START()
+PROLOGUE(mpn_sub_n)
+	ldws,ma		4(0,%r25),%r20
+	ldws,ma		4(0,%r24),%r19
+
+	addib,<=	-5,%r23,L(rest)
+	 sub		%r20,%r19,%r28	C subtract first limbs ignoring cy
+
+	.label	L(loop)
+	ldws,ma		4(0,%r25),%r20
+	ldws,ma		4(0,%r24),%r19
+	stws,ma		%r28,4(0,%r26)
+	subb		%r20,%r19,%r28
+	ldws,ma		4(0,%r25),%r20
+	ldws,ma		4(0,%r24),%r19
+	stws,ma		%r28,4(0,%r26)
+	subb		%r20,%r19,%r28
+	ldws,ma		4(0,%r25),%r20
+	ldws,ma		4(0,%r24),%r19
+	stws,ma		%r28,4(0,%r26)
+	subb		%r20,%r19,%r28
+	ldws,ma		4(0,%r25),%r20
+	ldws,ma		4(0,%r24),%r19
+	stws,ma		%r28,4(0,%r26)
+	addib,>		-4,%r23,L(loop)
+	subb		%r20,%r19,%r28
+
+	.label	L(rest)
+	addib,=		4,%r23,L(end)
+	nop
+
+	.label	L(eloop)
+	ldws,ma		4(0,%r25),%r20
+	ldws,ma		4(0,%r24),%r19
+	stws,ma		%r28,4(0,%r26)
+	addib,>		-1,%r23,L(eloop)
+	subb		%r20,%r19,%r28
+
+	.label	L(end)
+	stws		%r28,0(0,%r26)
+	addc		%r0,%r0,%r28
+	bv		0(%r2)
+	 subi		1,%r28,%r28
+EPILOGUE()
diff --git a/mpn/pa32/hppa1_1/pa7100/submul_1.asm b/mpn/pa32/hppa1_1/pa7100/submul_1.asm
new file mode 100644
index 000000000..f99079251
--- /dev/null
+++ b/mpn/pa32/hppa1_1/pa7100/submul_1.asm
@@ -0,0 +1,198 @@
+dnl  HP-PA 7100/7200 mpn_submul_1 -- Multiply a limb vector with a limb and
+dnl  subtract the result from a second limb vector.
+
+dnl  Copyright 1995, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl  MA 02111-1307, USA.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+define(`res_ptr',`%r26')
+define(`s1_ptr',`%r25')
+define(`size',`%r24')
+define(`s2_limb',`%r23')
+
+define(`cylimb',`%r28')
+define(`s0',`%r19')
+define(`s1',`%r20')
+define(`s2',`%r3')
+define(`s3',`%r4')
+define(`lo0',`%r21')
+define(`lo1',`%r5')
+define(`lo2',`%r6')
+define(`lo3',`%r7')
+define(`hi0',`%r22')
+define(`hi1',`%r23')				C safe to reuse
+define(`hi2',`%r29')
+define(`hi3',`%r1')
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+C	.callinfo	frame=128,no_calls
+
+	ldo	128(%r30),%r30
+	stws	s2_limb,-16(%r30)
+	add	 %r0,%r0,cylimb			C clear cy and cylimb
+	addib,<	-4,size,L(few_limbs)
+	fldws	-16(%r30),%fr31R
+
+	ldo	-112(%r30),%r31
+	stw	%r3,-96(%r30)
+	stw	%r4,-92(%r30)
+	stw	%r5,-88(%r30)
+	stw	%r6,-84(%r30)
+	stw	%r7,-80(%r30)
+
+	bb,>=,n	 s1_ptr,29,L(0)
+
+	fldws,ma 4(s1_ptr),%fr4
+	ldws	 0(res_ptr),s0
+	xmpyu	 %fr4,%fr31R,%fr5
+	fstds	 %fr5,-16(%r31)
+	ldws	-16(%r31),cylimb
+	ldws	-12(%r31),lo0
+	sub	 s0,lo0,s0
+	add	 s0,lo0,%r0			C invert cy
+	addib,< -1,size,L(few_limbs)
+	stws,ma	 s0,4(res_ptr)
+
+C start software pipeline ----------------------------------------------------
+	.label	L(0)
+	fldds,ma 8(s1_ptr),%fr4
+	fldds,ma 8(s1_ptr),%fr8
+
+	xmpyu	 %fr4L,%fr31R,%fr5
+	xmpyu	 %fr4R,%fr31R,%fr6
+	xmpyu	 %fr8L,%fr31R,%fr9
+	xmpyu	 %fr8R,%fr31R,%fr10
+
+	fstds	 %fr5,-16(%r31)
+	fstds	 %fr6,-8(%r31)
+	fstds	 %fr9,0(%r31)
+	fstds	 %fr10,8(%r31)
+
+	ldws   -16(%r31),hi0
+	ldws   -12(%r31),lo0
+	ldws	-8(%r31),hi1
+	ldws	-4(%r31),lo1
+	ldws	 0(%r31),hi2
+	ldws	 4(%r31),lo2
+	ldws	 8(%r31),hi3
+	ldws	12(%r31),lo3
+
+	addc	 lo0,cylimb,lo0
+	addc	 lo1,hi0,lo1
+	addc	 lo2,hi1,lo2
+	addc	 lo3,hi2,lo3
+
+	addib,<	 -4,size,L(end)
+	addc	 %r0,hi3,cylimb			C propagate carry into cylimb
+C main loop ------------------------------------------------------------------
+	.label	L(loop)
+	fldds,ma 8(s1_ptr),%fr4
+	fldds,ma 8(s1_ptr),%fr8
+
+	ldws	 0(res_ptr),s0
+	xmpyu	 %fr4L,%fr31R,%fr5
+	ldws	 4(res_ptr),s1
+	xmpyu	 %fr4R,%fr31R,%fr6
+	ldws	 8(res_ptr),s2
+	xmpyu	 %fr8L,%fr31R,%fr9
+	ldws	12(res_ptr),s3
+	xmpyu	 %fr8R,%fr31R,%fr10
+
+	fstds	 %fr5,-16(%r31)
+	sub	 s0,lo0,s0
+	fstds	 %fr6,-8(%r31)
+	subb	 s1,lo1,s1
+	fstds	 %fr9,0(%r31)
+	subb	 s2,lo2,s2
+	fstds	 %fr10,8(%r31)
+	subb	 s3,lo3,s3
+	subb	 %r0,%r0,lo0			C these two insns ...
+	add	 lo0,lo0,%r0			C ... just invert cy
+
+	ldws   -16(%r31),hi0
+	ldws   -12(%r31),lo0
+	ldws	-8(%r31),hi1
+	ldws	-4(%r31),lo1
+	ldws	 0(%r31),hi2
+	ldws	 4(%r31),lo2
+	ldws	 8(%r31),hi3
+	ldws	12(%r31),lo3
+
+	addc	 lo0,cylimb,lo0
+	stws,ma	 s0,4(res_ptr)
+	addc	 lo1,hi0,lo1
+	stws,ma	 s1,4(res_ptr)
+	addc	 lo2,hi1,lo2
+	stws,ma	 s2,4(res_ptr)
+	addc	 lo3,hi2,lo3
+	stws,ma	 s3,4(res_ptr)
+
+	addib,>= -4,size,L(loop)
+	addc	 %r0,hi3,cylimb			C propagate carry into cylimb
+C finish software pipeline ---------------------------------------------------
+	.label	L(end)
+	ldws	 0(res_ptr),s0
+	ldws	 4(res_ptr),s1
+	ldws	 8(res_ptr),s2
+	ldws	12(res_ptr),s3
+
+	sub	 s0,lo0,s0
+	stws,ma	 s0,4(res_ptr)
+	subb	 s1,lo1,s1
+	stws,ma	 s1,4(res_ptr)
+	subb	 s2,lo2,s2
+	stws,ma	 s2,4(res_ptr)
+	subb	 s3,lo3,s3
+	stws,ma	 s3,4(res_ptr)
+	subb	 %r0,%r0,lo0			C these two insns ...
+	add	 lo0,lo0,%r0			C ... invert cy
+
+C restore callee-saves registers ---------------------------------------------
+	ldw	-96(%r30),%r3
+	ldw	-92(%r30),%r4
+	ldw	-88(%r30),%r5
+	ldw	-84(%r30),%r6
+	ldw	-80(%r30),%r7
+
+	.label	L(few_limbs)
+	addib,=,n 4,size,L(ret)
+
+	.label	L(loop2)
+	fldws,ma 4(s1_ptr),%fr4
+	ldws	 0(res_ptr),s0
+	xmpyu	 %fr4,%fr31R,%fr5
+	fstds	 %fr5,-16(%r30)
+	ldws	-16(%r30),hi0
+	ldws	-12(%r30),lo0
+	addc	 lo0,cylimb,lo0
+	addc	 %r0,hi0,cylimb
+	sub	 s0,lo0,s0
+	add	 s0,lo0,%r0			C invert cy
+	stws,ma	 s0,4(res_ptr)
+	addib,<> -1,size,L(loop2)
+	nop
+
+	.label	L(ret)
+	addc	 %r0,cylimb,cylimb
+	bv	 0(%r2)
+	ldo	 -128(%r30),%r30
+EPILOGUE(mpn_submul_1)
diff --git a/mpn/pa32/hppa1_1/sqr_diagonal.asm b/mpn/pa32/hppa1_1/sqr_diagonal.asm
new file mode 100644
index 000000000..53e4254b7
--- /dev/null
+++ b/mpn/pa32/hppa1_1/sqr_diagonal.asm
@@ -0,0 +1,51 @@
+dnl  HP-PA 1.1 32-bit mpn_sqr_diagonal.
+
+dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl  MA 02111-1307, USA.
+
+include(`../config.m4')
+
+C This code runs at 6 cycles/limb on the PA7100 and 2.5 cycles/limb on PA8x00.
+C 2-way unrolling wouldn't help the PA7100; it could however bring times down
+C to 2.0 cycles/limb for the PA8x00.
+
+C INPUT PARAMETERS
+define(`rp',`%r26')
+define(`up',`%r25')
+define(`n',`%r24')
+
+ASM_START()
+PROLOGUE(mpn_sqr_diagonal)
+	ldo		4(rp),rp
+	fldws,ma	4(up),%fr4r
+	addib,=		-1,n,L(exit)
+	xmpyu		%fr4r,%fr4r,%fr5
+
+	.label	L(loop)
+	fldws,ma	4(up),%fr4r
+	fstws		%fr5r,-4(rp)
+	fstws,ma	%fr5l,8(rp)
+	addib,<>	-1,n,L(loop)
+	xmpyu		%fr4r,%fr4r,%fr5
+
+	.label	L(exit)
+	fstws		%fr5r,-4(rp)
+	bv		0(%r2)
+	fstws		%fr5l,0(rp)
+EPILOGUE(mpn_sqr_diagonal)
diff --git a/mpn/pa32/hppa1_1/submul_1.asm b/mpn/pa32/hppa1_1/submul_1.asm
new file mode 100644
index 000000000..3a6c756cc
--- /dev/null
+++ b/mpn/pa32/hppa1_1/submul_1.asm
@@ -0,0 +1,107 @@
+dnl  HP-PA 1.1 mpn_submul_1 -- Multiply a limb vector with a limb and subtract
+dnl  the result from a second limb vector.
+
+dnl  Copyright 1992, 1993, 1994, 2000, 2001, 2002 Free Software Foundation,
+dnl  Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl  MA 02111-1307, USA.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr	r26
+C s1_ptr	r25
+C size		r24
+C s2_limb	r23
+
+C This runs at 12 cycles/limb on a PA7000.  With the used instructions, it can
+C not become faster due to data cache contention after a store.  On the PA7100
+C it runs at 11 cycles/limb.
+
+C There are some ideas described in mul_1.asm that applies to this code too.
+
+C It seems possible to make this run as fast as mpn_addmul_1, if we use
+C 	sub,>>=	%r29,%r19,%r22
+C	addi	1,%r28,%r28
+C but that requires reworking the hairy software pipeline...
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+C	.callinfo	frame=64,no_calls
+
+	ldo		64(%r30),%r30
+	fldws,ma	4(%r25),%fr5
+	stw		%r23,-16(%r30)		C move s2_limb ...
+	addib,=		-1,%r24,L(just_one_limb)
+	 fldws		-16(%r30),%fr4		C ... into fr4
+	add		%r0,%r0,%r0		C clear carry
+	xmpyu		%fr4,%fr5,%fr6
+	fldws,ma	4(%r25),%fr7
+	fstds		%fr6,-16(%r30)
+	xmpyu		%fr4,%fr7,%fr8
+	ldw		-12(%r30),%r19		C least significant limb in product
+	ldw		-16(%r30),%r28
+
+	fstds		%fr8,-16(%r30)
+	addib,=		-1,%r24,L(end)
+	 ldw		-12(%r30),%r1
+
+C Main loop
+	.label	L(loop)
+	ldws		0(%r26),%r29
+	fldws,ma	4(%r25),%fr5
+	sub		%r29,%r19,%r22
+	add		%r22,%r19,%r0
+	stws,ma		%r22,4(%r26)
+	addc		%r28,%r1,%r19
+	xmpyu		%fr4,%fr5,%fr6
+	ldw		-16(%r30),%r28
+	fstds		%fr6,-16(%r30)
+	addc		%r0,%r28,%r28
+	addib,<>	-1,%r24,L(loop)
+	 ldw		-12(%r30),%r1
+
+	.label	L(end)
+	ldw		0(%r26),%r29
+	sub		%r29,%r19,%r22
+	add		%r22,%r19,%r0
+	stws,ma		%r22,4(%r26)
+	addc		%r28,%r1,%r19
+	ldw		-16(%r30),%r28
+	ldws		0(%r26),%r29
+	addc		%r0,%r28,%r28
+	sub		%r29,%r19,%r22
+	add		%r22,%r19,%r0
+	stws,ma		%r22,4(%r26)
+	addc		%r0,%r28,%r28
+	bv		0(%r2)
+	 ldo		-64(%r30),%r30
+
+	.label	L(just_one_limb)
+	xmpyu		%fr4,%fr5,%fr6
+	ldw		0(%r26),%r29
+	fstds		%fr6,-16(%r30)
+	ldw		-12(%r30),%r1
+	ldw		-16(%r30),%r28
+	sub		%r29,%r1,%r22
+	add		%r22,%r1,%r0
+	stw		%r22,0(%r26)
+	addc		%r0,%r28,%r28
+	bv		0(%r2)
+	 ldo		-64(%r30),%r30
+EPILOGUE()
diff --git a/mpn/pa32/hppa1_1/udiv_qrnnd.asm b/mpn/pa32/hppa1_1/udiv_qrnnd.asm
new file mode 100644
index 000000000..0fb302e53
--- /dev/null
+++ b/mpn/pa32/hppa1_1/udiv_qrnnd.asm
@@ -0,0 +1,96 @@
+dnl  HP-PA  __udiv_qrnnd division support, used from longlong.h.
+dnl  This version runs fast on PA 7000 and later.
+
+dnl  Copyright 1993, 1994, 2000, 2001 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl  MA 02111-1307, USA.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C rem_ptr	gr26
+C n1		gr25
+C n0		gr24
+C d		gr23
+
+C This file has caused a lot of trouble, since it demands PIC reference to
+C static data, which triggers bugs in gas (at least version 2.7 through
+C 2.11.2).  When the bug is triggered, many bogus relocs are generated.  The
+C current solution is to stuff data right into the code, and refer it using
+C absolute offsets.  Fragile to be sure, but nothing else seems to work.
+
+ASM_START()
+ifdef(`PIC',`',
+`	RODATA
+	INT64(L(0000), 0x43f00000, 0x0)		C 2^64
+')
+
+PROLOGUE(mpn_udiv_qrnnd)
+	.proc
+	.callinfo	frame=64,no_calls
+	.entry
+
+	ldo		64(%r30),%r30
+
+	stws		%r25,-16(0,%r30)	C n_hi
+	stws		%r24,-12(0,%r30)	C n_lo
+
+ifdef(`PIC',
+`	bl		.+20,%r31
+	dep		%r0,31,2,%r31
+	.word	0x0				C padding for alignment
+	.word	0x43f00000, 0x0			C 2^64
+	ldo		4(%r31),%r31',
+`	ldil		`L'%L(0000),%r31
+	ldo		R%L(0000)(%r31),%r31')
+
+	fldds		-16(0,%r30),%fr5
+	stws		%r23,-12(0,%r30)
+	comib,<=	0,%r25,L(1)
+	fcnvxf,dbl,dbl	%fr5,%fr5
+	fldds		0(0,%r31),%fr4
+	fadd,dbl	%fr4,%fr5,%fr5
+
+	.label	L(1)
+	fcpy,sgl	%fr0,%fr6L
+	fldws		-12(0,%r30),%fr6R
+	fcnvxf,dbl,dbl	%fr6,%fr4
+
+	fdiv,dbl	%fr5,%fr4,%fr5
+
+	fcnvfx,dbl,dbl	%fr5,%fr4
+	fstws		%fr4R,-16(%r30)
+	xmpyu		%fr4R,%fr6R,%fr6
+	ldws		-16(%r30),%r28
+	fstds		%fr6,-16(0,%r30)
+	ldws		-12(0,%r30),%r21
+	ldws		-16(0,%r30),%r20
+	sub		%r24,%r21,%r22
+	subb		%r25,%r20,%r20
+	comib,=		0,%r20,L(2)
+	ldo		-64(%r30),%r30
+
+	add		%r22,%r23,%r22
+	ldo		-1(%r28),%r28
+
+	.label	L(2)
+	bv		0(%r2)
+	stws		%r22,0(0,%r26)
+	.exit
+	.procend
+EPILOGUE(mpn_udiv_qrnnd)
diff --git a/mpn/pa32/hppa1_1/umul.asm b/mpn/pa32/hppa1_1/umul.asm
new file mode 100644
index 000000000..d8bc93317
--- /dev/null
+++ b/mpn/pa32/hppa1_1/umul.asm
@@ -0,0 +1,38 @@
+dnl  Copyright 1999, 2001 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl  MA 02111-1307, USA.
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_umul_ppmm)
+C	.callinfo frame=64,no_calls
+
+	ldo 64(%r30),%r30
+	stw %r25,-16(0,%r30)
+	fldws -16(0,%r30),%fr22R
+	stw %r24,-16(0,%r30)
+	fldws -16(0,%r30),%fr22L
+	xmpyu %fr22R,%fr22L,%fr22
+	fstds %fr22,-16(0,%r30)
+	ldw -16(0,%r30),%r28
+	ldw -12(0,%r30),%r29
+	stw %r29,0(0,%r26)
+	bv 0(%r2)
+	ldo -64(%r30),%r30
+EPILOGUE()
diff --git a/mpn/pa32/hppa2_0/add_n.asm b/mpn/pa32/hppa2_0/add_n.asm
new file mode 100644
index 000000000..fc0f14356
--- /dev/null
+++ b/mpn/pa32/hppa2_0/add_n.asm
@@ -0,0 +1,98 @@
+dnl  HP-PA 2.0 32-bit mpn_add_n -- Add two limb vectors of the same length > 0
+dnl  and store sum in a third limb vector.
+
+dnl  Copyright 1997, 1998, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl  MA 02111-1307, USA.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr	gr26
+C s1_ptr	gr25
+C s2_ptr	gr24
+C size		gr23
+
+C This runs at 2 cycles/limb on PA8000.
+
+ASM_START()
+PROLOGUE(mpn_add_n)
+	sub		%r0,%r23,%r22
+	zdep		%r22,30,3,%r28		C r28 = 2 * (-n & 7)
+	zdep		%r22,29,3,%r22		C r22 = 4 * (-n & 7)
+	sub		%r25,%r22,%r25		C offset s1_ptr
+	sub		%r24,%r22,%r24		C offset s2_ptr
+	sub		%r26,%r22,%r26		C offset res_ptr
+	blr		%r28,%r0		C branch into loop
+	add		%r0,%r0,%r0		C reset carry
+
+	.label	L(loop)
+	ldw		0(%r25),%r20
+	ldw		0(%r24),%r31
+	addc		%r20,%r31,%r20
+	stw		%r20,0(%r26)
+
+	.label	L(7)
+	ldw		4(%r25),%r21
+	ldw		4(%r24),%r19
+	addc		%r21,%r19,%r21
+	stw		%r21,4(%r26)
+
+	.label	L(6)
+	ldw		8(%r25),%r20
+	ldw		8(%r24),%r31
+	addc		%r20,%r31,%r20
+	stw		%r20,8(%r26)
+
+	.label	L(5)
+	ldw		12(%r25),%r21
+	ldw		12(%r24),%r19
+	addc		%r21,%r19,%r21
+	stw		%r21,12(%r26)
+
+	.label	L(4)
+	ldw		16(%r25),%r20
+	ldw		16(%r24),%r31
+	addc		%r20,%r31,%r20
+	stw		%r20,16(%r26)
+
+	.label	L(3)
+	ldw		20(%r25),%r21
+	ldw		20(%r24),%r19
+	addc		%r21,%r19,%r21
+	stw		%r21,20(%r26)
+
+	.label	L(2)
+	ldw		24(%r25),%r20
+	ldw		24(%r24),%r31
+	addc		%r20,%r31,%r20
+	stw		%r20,24(%r26)
+
+	.label	L(1)
+	ldw		28(%r25),%r21
+	ldo		32(%r25),%r25
+	ldw		28(%r24),%r19
+	addc		%r21,%r19,%r21
+	stw		%r21,28(%r26)
+	ldo		32(%r24),%r24
+	addib,>		-8,%r23,L(loop)
+	ldo		32(%r26),%r26
+
+	bv		(%r2)
+	addc		%r0,%r0,%r28
+EPILOGUE()
diff --git a/mpn/pa32/hppa2_0/gmp-mparam.h b/mpn/pa32/hppa2_0/gmp-mparam.h
new file mode 100644
index 000000000..f2306773c
--- /dev/null
+++ b/mpn/pa32/hppa2_0/gmp-mparam.h
@@ -0,0 +1,57 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
+
+/* Generated by tuneup.c, 2001-02-18, gcc 2.95 */
+
+#define MUL_KARATSUBA_THRESHOLD       16
+#define MUL_TOOM3_THRESHOLD          129
+
+#define SQR_BASECASE_THRESHOLD         6
+#define SQR_KARATSUBA_THRESHOLD       48
+#define SQR_TOOM3_THRESHOLD          153
+
+#define DIV_SB_PREINV_THRESHOLD            6
+#define DIV_DC_THRESHOLD                 102
+#define POWM_THRESHOLD               166
+
+#define GCD_ACCEL_THRESHOLD            4
+#define GCDEXT_THRESHOLD               0
+
+#define DIVREM_1_NORM_THRESHOLD        4
+#define DIVREM_1_UNNORM_THRESHOLD      6
+#define MOD_1_NORM_THRESHOLD           4
+#define MOD_1_UNNORM_THRESHOLD         6
+#define USE_PREINV_MOD_1               0
+#define DIVREM_2_THRESHOLD             0
+#define DIVEXACT_1_THRESHOLD           0
+#define MODEXACT_1_ODD_THRESHOLD       0
+
+#define MUL_FFT_TABLE  { 656, 928, 1920, 3584, 14336, 24576, 0 }
+#define MUL_FFT_MODF_THRESHOLD       584
+#define MUL_FFT_THRESHOLD           3840
+
+#define SQR_FFT_TABLE  { 656, 928, 1920, 3584, 14336, 24576, 0 }
+#define SQR_FFT_MODF_THRESHOLD       616
+#define SQR_FFT_THRESHOLD           3840
diff --git a/mpn/pa32/hppa2_0/sqr_diagonal.asm b/mpn/pa32/hppa2_0/sqr_diagonal.asm
new file mode 100644
index 000000000..463aa45f4
--- /dev/null
+++ b/mpn/pa32/hppa2_0/sqr_diagonal.asm
@@ -0,0 +1,103 @@
+dnl  HP-PA 32-bit mpn_sqr_diagonal optimized for the PA8x00.
+
+dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl  MA 02111-1307, USA.
+
+include(`../config.m4')
+
+C This code runs at 6 cycles/limb on the PA7100 and 2 cycles/limb on PA8x00.
+C The 2-way unrolling is actually not helping the PA7100.
+
+C INPUT PARAMETERS
+define(`rp',`%r26')
+define(`up',`%r25')
+define(`n',`%r24')
+
+ASM_START()
+PROLOGUE(mpn_sqr_diagonal)
+
+	fldws,ma	4(up),%fr4r
+	addib,=		-1,n,L(end1)
+	ldo		4(rp),rp
+
+	fldws,ma	4(up),%fr6r
+	addib,=		-1,n,L(end2)
+	xmpyu		%fr4r,%fr4r,%fr5
+
+	fldws,ma	4(up),%fr4r
+	addib,=		-1,n,L(end3)
+	xmpyu		%fr6r,%fr6r,%fr7
+
+
+	.label	L(loop)
+	fldws,ma	4(up),%fr6r
+	fstws		%fr5r,-4(rp)
+	fstws,ma	%fr5l,8(rp)
+	addib,=		-1,n,L(exite)
+	xmpyu		%fr4r,%fr4r,%fr5
+	fldws,ma	4(up),%fr4r
+	fstws		%fr7r,-4(rp)
+	fstws,ma	%fr7l,8(rp)
+	addib,<>	-1,n,L(loop)
+	xmpyu		%fr6r,%fr6r,%fr7
+
+	.label	L(exito)
+	fstws		%fr5r,-4(rp)
+	fstws		%fr5l,0(rp)
+	xmpyu		%fr4r,%fr4r,%fr5
+	fstws		%fr7r,4(rp)
+	fstws		%fr7l,8(rp)
+	fstws,mb	%fr5r,12(rp)
+	bv		0(%r2)
+	fstws		%fr5l,4(rp)
+
+	.label	L(exite)
+	fstws		%fr7r,-4(rp)
+	fstws		%fr7l,0(rp)
+	xmpyu		%fr6r,%fr6r,%fr7
+	fstws		%fr5r,4(rp)
+	fstws		%fr5l,8(rp)
+	fstws,mb	%fr7r,12(rp)
+	bv		0(%r2)
+	fstws		%fr7l,4(rp)
+
+	.label	L(end1)
+	xmpyu		%fr4r,%fr4r,%fr5
+	fstws		%fr5r,-4(rp)
+	bv		0(%r2)
+	fstws,ma	%fr5l,8(rp)
+
+	.label	L(end2)
+	xmpyu		%fr6r,%fr6r,%fr7
+	fstws		%fr5r,-4(rp)
+	fstws		%fr5l,0(rp)
+	fstws		%fr7r,4(rp)
+	bv		0(%r2)
+	fstws		%fr7l,8(rp)
+
+	.label	L(end3)
+	fstws		%fr5r,-4(rp)
+	fstws		%fr5l,0(rp)
+	xmpyu		%fr4r,%fr4r,%fr5
+	fstws		%fr7r,4(rp)
+	fstws		%fr7l,8(rp)
+	fstws,mb	%fr5r,12(rp)
+	bv		0(%r2)
+	fstws		%fr5l,4(rp)
+EPILOGUE(mpn_sqr_diagonal)
diff --git a/mpn/pa32/hppa2_0/sub_n.asm b/mpn/pa32/hppa2_0/sub_n.asm
new file mode 100644
index 000000000..d6b930dee
--- /dev/null
+++ b/mpn/pa32/hppa2_0/sub_n.asm
@@ -0,0 +1,98 @@
+dnl  HP-PA 2.0 32-bit mpn_sub_n -- Subtract two limb vectors of the same
+dnl  length > 0 and store difference in a third limb vector.
+
+dnl  Copyright 1997, 1998, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl  MA 02111-1307, USA.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr	gr26
+C s1_ptr	gr25
+C s2_ptr	gr24
+C size		gr23
+
+C This runs at 2 cycles/limb on PA8000.
+
+ASM_START()
+PROLOGUE(mpn_sub_n)
+	sub		%r0,%r23,%r22
+	zdep		%r22,30,3,%r28		C r28 = 2 * (-n & 7)
+	zdep		%r22,29,3,%r22		C r22 = 4 * (-n & 7)
+	sub		%r25,%r22,%r25		C offset s1_ptr
+	sub		%r24,%r22,%r24		C offset s2_ptr
+	blr		%r28,%r0		C branch into loop
+	sub		%r26,%r22,%r26		C offset res_ptr and set carry
+
+	.label	L(loop)
+	ldw		0(%r25),%r20
+	ldw		0(%r24),%r31
+	subb		%r20,%r31,%r20
+	stw		%r20,0(%r26)
+
+	.label	L(7)
+	ldw		4(%r25),%r21
+	ldw		4(%r24),%r19
+	subb		%r21,%r19,%r21
+	stw		%r21,4(%r26)
+
+	.label	L(6)
+	ldw		8(%r25),%r20
+	ldw		8(%r24),%r31
+	subb		%r20,%r31,%r20
+	stw		%r20,8(%r26)
+
+	.label	L(5)
+	ldw		12(%r25),%r21
+	ldw		12(%r24),%r19
+	subb		%r21,%r19,%r21
+	stw		%r21,12(%r26)
+
+	.label	L(4)
+	ldw		16(%r25),%r20
+	ldw		16(%r24),%r31
+	subb		%r20,%r31,%r20
+	stw		%r20,16(%r26)
+
+	.label	L(3)
+	ldw		20(%r25),%r21
+	ldw		20(%r24),%r19
+	subb		%r21,%r19,%r21
+	stw		%r21,20(%r26)
+
+	.label	L(2)
+	ldw		24(%r25),%r20
+	ldw		24(%r24),%r31
+	subb		%r20,%r31,%r20
+	stw		%r20,24(%r26)
+
+	.label	L(1)
+	ldw		28(%r25),%r21
+	ldo		32(%r25),%r25
+	ldw		28(%r24),%r19
+	subb		%r21,%r19,%r21
+	stw		%r21,28(%r26)
+	ldo		32(%r24),%r24
+	addib,>		-8,%r23,L(loop)
+	ldo		32(%r26),%r26
+
+	addc		%r0,%r0,%r28
+	bv		(%r2)
+	subi		1,%r28,%r28
+EPILOGUE()
diff --git a/mpn/pa32/lshift.asm b/mpn/pa32/lshift.asm
new file mode 100644
index 000000000..393f4dc0c
--- /dev/null
+++ b/mpn/pa32/lshift.asm
@@ -0,0 +1,66 @@
+dnl  HP-PA  mpn_lshift -- Shift a number left.
+
+dnl  Copyright 1992, 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl  MA 02111-1307, USA.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr	gr26
+C s_ptr		gr25
+C size		gr24
+C cnt		gr23
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+	sh2add		%r24,%r25,%r25
+	sh2add		%r24,%r26,%r26
+	ldws,mb		-4(0,%r25),%r22
+	subi		32,%r23,%r1
+	mtsar		%r1
+	addib,=		-1,%r24,L(0004)
+	vshd		%r0,%r22,%r28		C compute carry out limb
+	ldws,mb		-4(0,%r25),%r29
+	addib,=		-1,%r24,L(0002)
+	vshd		%r22,%r29,%r20
+
+	.label	L(loop)
+	ldws,mb		-4(0,%r25),%r22
+	stws,mb		%r20,-4(0,%r26)
+	addib,=		-1,%r24,L(0003)
+	vshd		%r29,%r22,%r20
+	ldws,mb		-4(0,%r25),%r29
+	stws,mb		%r20,-4(0,%r26)
+	addib,<>	-1,%r24,L(loop)
+	vshd		%r22,%r29,%r20
+
+	.label	L(0002)
+	stws,mb		%r20,-4(0,%r26)
+	vshd		%r29,%r0,%r20
+	bv		0(%r2)
+	stw		%r20,-4(0,%r26)
+
+	.label	L(0003)
+	stws,mb		%r20,-4(0,%r26)
+
+	.label	L(0004)
+	vshd		%r22,%r0,%r20
+	bv		0(%r2)
+	stw		%r20,-4(0,%r26)
+EPILOGUE()
diff --git a/mpn/pa32/rshift.asm b/mpn/pa32/rshift.asm
new file mode 100644
index 000000000..f4efdab42
--- /dev/null
+++ b/mpn/pa32/rshift.asm
@@ -0,0 +1,63 @@
+dnl  HP-PA  mpn_rshift -- Shift a number right.
+
+dnl  Copyright 1992, 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl  MA 02111-1307, USA.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr	gr26
+C s_ptr		gr25
+C size		gr24
+C cnt		gr23
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+	ldws,ma		4(0,%r25),%r22
+	mtsar		%r23
+	addib,=		-1,%r24,L(0004)
+	vshd		%r22,%r0,%r28		C compute carry out limb
+	ldws,ma		4(0,%r25),%r29
+	addib,=		-1,%r24,L(0002)
+	vshd		%r29,%r22,%r20
+
+	.label	L(loop)
+	ldws,ma		4(0,%r25),%r22
+	stws,ma		%r20,4(0,%r26)
+	addib,=		-1,%r24,L(0003)
+	vshd		%r22,%r29,%r20
+	ldws,ma		4(0,%r25),%r29
+	stws,ma		%r20,4(0,%r26)
+	addib,<>	-1,%r24,L(loop)
+	vshd		%r29,%r22,%r20
+
+	.label	L(0002)
+	stws,ma		%r20,4(0,%r26)
+	vshd		%r0,%r29,%r20
+	bv		0(%r2)
+	stw		%r20,0(0,%r26)
+
+	.label	L(0003)
+	stws,ma		%r20,4(0,%r26)
+
+	.label	L(0004)
+	vshd		%r0,%r22,%r20
+	bv		0(%r2)
+	stw		%r20,0(0,%r26)
+EPILOGUE()
diff --git a/mpn/pa32/sub_n.asm b/mpn/pa32/sub_n.asm
new file mode 100644
index 000000000..33a15e468
--- /dev/null
+++ b/mpn/pa32/sub_n.asm
@@ -0,0 +1,55 @@
+dnl  HP-PA mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+dnl  store difference in a third limb vector.
+
+dnl  Copyright 1992, 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl  MA 02111-1307, USA.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr	gr26
+C s1_ptr	gr25
+C s2_ptr	gr24
+C size		gr23
+
+C One might want to unroll this as for other processors, but it turns out that
+C the data cache contention after a store makes such unrolling useless.  We
+C can't come under 5 cycles/limb anyway.
+
+ASM_START()
+PROLOGUE(mpn_sub_n)
+	ldws,ma		4(0,%r25),%r20
+	ldws,ma		4(0,%r24),%r19
+
+	addib,=		-1,%r23,L(end)	C check for (SIZE == 1)
+	 sub		%r20,%r19,%r28	C subtract first limbs ignoring cy
+
+	.label	L(loop)
+	ldws,ma		4(0,%r25),%r20
+	ldws,ma		4(0,%r24),%r19
+	stws,ma		%r28,4(0,%r26)
+	addib,<>	-1,%r23,L(loop)
+	 subb		%r20,%r19,%r28
+
+	.label	L(end)
+	stws		%r28,0(0,%r26)
+	addc		%r0,%r0,%r28
+	bv		0(%r2)
+	 subi		1,%r28,%r28
+EPILOGUE()
diff --git a/mpn/pa32/udiv_qrnnd.asm b/mpn/pa32/udiv_qrnnd.asm
new file mode 100644
index 000000000..ff4c8a47e
--- /dev/null
+++ b/mpn/pa32/udiv_qrnnd.asm
@@ -0,0 +1,282 @@
+dnl  HP-PA  __udiv_qrnnd division support, used from longlong.h.
+dnl  This version runs fast on pre-PA7000 CPUs.
+
+dnl  Copyright 1993, 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl  MA 02111-1307, USA.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C rem_ptr	gr26
+C n1		gr25
+C n0		gr24
+C d		gr23
+
+C The code size is a bit excessive.  We could merge the last two ds;addc
+C sequences by simply moving the "bb,< Odd" instruction down.  The only
+C trouble is the FFFFFFFF code that would need some hacking.
+
+ASM_START()
+PROLOGUE(mpn_udiv_qrnnd)
+	comb,<		%r23,0,L(largedivisor)
+	 sub		%r0,%r23,%r1		C clear cy as side-effect
+	ds		%r0,%r1,%r0
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r28
+	ds		%r25,%r23,%r25
+	comclr,>=	%r25,%r0,%r0
+	addl		%r25,%r23,%r25
+	stws		%r25,0(0,%r26)
+	bv		0(%r2)
+	 addc		%r28,%r28,%r28
+
+	.label	L(largedivisor)
+	extru		%r24,31,1,%r19		C r19 = n0 & 1
+	bb,<		%r23,31,L(odd)
+	 extru		%r23,30,31,%r22		C r22 = d >> 1
+	shd		%r25,%r24,1,%r24	C r24 = new n0
+	extru		%r25,30,31,%r25		C r25 = new n1
+	sub		%r0,%r22,%r21
+	ds		%r0,%r21,%r0
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	comclr,>=	%r25,%r0,%r0
+	addl		%r25,%r22,%r25
+	sh1addl		%r25,%r19,%r25
+	stws		%r25,0(0,%r26)
+	bv		0(%r2)
+	 addc		%r24,%r24,%r28
+
+	.label	L(odd)
+	addib,sv,n	1,%r22,L(FFFFFFFF)	C r22 = (d / 2 + 1)
+	shd		%r25,%r24,1,%r24	C r24 = new n0
+	extru		%r25,30,31,%r25		C r25 = new n1
+	sub		%r0,%r22,%r21
+	ds		%r0,%r21,%r0
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r28
+	comclr,>=	%r25,%r0,%r0
+	addl		%r25,%r22,%r25
+	sh1addl		%r25,%r19,%r25
+C We have computed (n1,,n0) / (d + 1), q' = r28, r' = r25
+	add,nuv		%r28,%r25,%r25
+	addl		%r25,%r1,%r25
+	addc		%r0,%r28,%r28
+	sub,<<		%r25,%r23,%r0
+	addl		%r25,%r1,%r25
+	stws		%r25,0(0,%r26)
+	bv		0(%r2)
+	 addc		%r0,%r28,%r28
+
+C This is just a special case of the code above.
+C We come here when d == 0xFFFFFFFF
+	.label	L(FFFFFFFF)
+	add,uv		%r25,%r24,%r24
+	sub,<<		%r24,%r23,%r0
+	ldo		1(%r24),%r24
+	stws		%r24,0(0,%r26)
+	bv		0(%r2)
+	 addc		%r0,%r25,%r28
+EPILOGUE()
author	tege <tege@gmplib.org>	2002-03-26 17:30:50 +0100
committer	tege <tege@gmplib.org>	2002-03-26 17:30:50 +0100
commit	59ccffe8a22a0c991f9c44a8c17db500f9b08a51 (patch)
tree	f2f02ca330e0de3bd9b186f54071a1c8f1b195a5 /mpn/pa32
parent	337d7796e7aadc1055f2e2e4c89f5849f6a1f994 (diff)
download	gmp-59ccffe8a22a0c991f9c44a8c17db500f9b08a51.tar.gz