summaryrefslogtreecommitdiff
path: root/ghc/rts/gmp/mpn
diff options
context:
space:
mode:
authorsimonm <unknown>1998-06-05 14:37:58 +0000
committersimonm <unknown>1998-06-05 14:37:58 +0000
commit739d408b7f5b024bda531397d48315782c9f51d2 (patch)
tree8edbfbf55a34310d263e7da926652f88e2a93243 /ghc/rts/gmp/mpn
parent798c2ac0ef1ba9ee9711275a964af7a2575c4d52 (diff)
downloadhaskell-739d408b7f5b024bda531397d48315782c9f51d2.tar.gz
[project @ 1998-06-05 14:37:58 by simonm]
Initial revision
Diffstat (limited to 'ghc/rts/gmp/mpn')
-rw-r--r--ghc/rts/gmp/mpn/x86/pentium/add_n.S130
-rw-r--r--ghc/rts/gmp/mpn/x86/pentium/addmul_1.S83
-rw-r--r--ghc/rts/gmp/mpn/x86/pentium/lshift.S217
-rw-r--r--ghc/rts/gmp/mpn/x86/pentium/mul_1.S79
-rw-r--r--ghc/rts/gmp/mpn/x86/pentium/rshift.S217
-rw-r--r--ghc/rts/gmp/mpn/x86/pentium/sub_n.S130
-rw-r--r--ghc/rts/gmp/mpn/x86/pentium/submul_1.S83
-rw-r--r--ghc/rts/gmp/mpn/x86/submul_1.S76
-rw-r--r--ghc/rts/gmp/mpn/x86/syntax.h62
9 files changed, 1077 insertions, 0 deletions
diff --git a/ghc/rts/gmp/mpn/x86/pentium/add_n.S b/ghc/rts/gmp/mpn/x86/pentium/add_n.S
new file mode 100644
index 0000000000..ac6f2819b2
--- /dev/null
+++ b/ghc/rts/gmp/mpn/x86/pentium/add_n.S
@@ -0,0 +1,130 @@
+/* Pentium __mpn_add_n -- Add two limb vectors of the same length > 0 and store
+ sum in a third limb vector.
+
+Copyright (C) 1992, 1994, 1995, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+ INPUT PARAMETERS
+ res_ptr (sp + 4)
+ s1_ptr (sp + 8)
+ s2_ptr (sp + 12)
+ size (sp + 16)
+*/
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+.text
+ ALIGN (3)
+ .globl C_SYMBOL_NAME(__mpn_add_n)
+C_SYMBOL_NAME(__mpn_add_n:)
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ pushl %ebp
+
+ movl 20(%esp),%edi /* res_ptr */
+ movl 24(%esp),%esi /* s1_ptr */
+ movl 28(%esp),%ebp /* s2_ptr */
+ movl 32(%esp),%ecx /* size */
+
+ movl (%ebp),%ebx
+
+ decl %ecx
+ movl %ecx,%edx
+ shrl $3,%ecx
+ andl $7,%edx
+ testl %ecx,%ecx /* zero carry flag */
+ jz Lend
+ pushl %edx
+
+ ALIGN (3)
+Loop: movl 28(%edi),%eax /* fetch destination cache line */
+ leal 32(%edi),%edi
+
+L1: movl (%esi),%eax
+ movl 4(%esi),%edx
+ adcl %ebx,%eax
+ movl 4(%ebp),%ebx
+ adcl %ebx,%edx
+ movl 8(%ebp),%ebx
+ movl %eax,-32(%edi)
+ movl %edx,-28(%edi)
+
+L2: movl 8(%esi),%eax
+ movl 12(%esi),%edx
+ adcl %ebx,%eax
+ movl 12(%ebp),%ebx
+ adcl %ebx,%edx
+ movl 16(%ebp),%ebx
+ movl %eax,-24(%edi)
+ movl %edx,-20(%edi)
+
+L3: movl 16(%esi),%eax
+ movl 20(%esi),%edx
+ adcl %ebx,%eax
+ movl 20(%ebp),%ebx
+ adcl %ebx,%edx
+ movl 24(%ebp),%ebx
+ movl %eax,-16(%edi)
+ movl %edx,-12(%edi)
+
+L4: movl 24(%esi),%eax
+ movl 28(%esi),%edx
+ adcl %ebx,%eax
+ movl 28(%ebp),%ebx
+ adcl %ebx,%edx
+ movl 32(%ebp),%ebx
+ movl %eax,-8(%edi)
+ movl %edx,-4(%edi)
+
+ leal 32(%esi),%esi
+ leal 32(%ebp),%ebp
+ decl %ecx
+ jnz Loop
+
+ popl %edx
+Lend:
+ decl %edx /* test %edx w/o clobbering carry */
+ js Lend2
+ incl %edx
+Loop2:
+ leal 4(%edi),%edi
+ movl (%esi),%eax
+ adcl %ebx,%eax
+ movl 4(%ebp),%ebx
+ movl %eax,-4(%edi)
+ leal 4(%esi),%esi
+ leal 4(%ebp),%ebp
+ decl %edx
+ jnz Loop2
+Lend2:
+ movl (%esi),%eax
+ adcl %ebx,%eax
+ movl %eax,(%edi)
+
+ sbbl %eax,%eax
+ negl %eax
+
+ popl %ebp
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
diff --git a/ghc/rts/gmp/mpn/x86/pentium/addmul_1.S b/ghc/rts/gmp/mpn/x86/pentium/addmul_1.S
new file mode 100644
index 0000000000..7cfa5db687
--- /dev/null
+++ b/ghc/rts/gmp/mpn/x86/pentium/addmul_1.S
@@ -0,0 +1,83 @@
+/* Pentium __mpn_addmul_1 -- Multiply a limb vector with a limb and add
+ the result to a second limb vector.
+
+Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+ INPUT PARAMETERS
+ res_ptr (sp + 4)
+ s1_ptr (sp + 8)
+ size (sp + 12)
+ s2_limb (sp + 16)
+*/
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define res_ptr edi
+#define s1_ptr esi
+#define size ecx
+#define s2_limb ebp
+
+ TEXT
+ ALIGN (3)
+ GLOBL C_SYMBOL_NAME(__mpn_addmul_1)
+C_SYMBOL_NAME(__mpn_addmul_1:)
+
+ INSN1(push,l ,R(edi))
+ INSN1(push,l ,R(esi))
+ INSN1(push,l ,R(ebx))
+ INSN1(push,l ,R(ebp))
+
+ INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20))
+ INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24))
+ INSN2(mov,l ,R(size),MEM_DISP(esp,28))
+ INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32))
+
+ INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4))
+ INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4))
+ INSN1(neg,l ,R(size))
+ INSN2(xor,l ,R(ebx),R(ebx))
+ ALIGN (3)
+
+Loop: INSN2(adc,l ,R(ebx),$0)
+ INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4))
+
+ INSN1(mul,l ,R(s2_limb))
+
+ INSN2(add,l ,R(eax),R(ebx))
+ INSN2(mov,l ,R(ebx),MEM_INDEX(res_ptr,size,4))
+
+ INSN2(adc,l ,R(edx),$0)
+ INSN2(add,l ,R(ebx),R(eax))
+
+ INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(ebx))
+ INSN1(inc,l ,R(size))
+
+ INSN2(mov,l ,R(ebx),R(edx))
+ INSN1(jnz, ,Loop)
+
+ INSN2(adc,l ,R(ebx),$0)
+ INSN2(mov,l ,R(eax),R(ebx))
+ INSN1(pop,l ,R(ebp))
+ INSN1(pop,l ,R(ebx))
+ INSN1(pop,l ,R(esi))
+ INSN1(pop,l ,R(edi))
+ ret
diff --git a/ghc/rts/gmp/mpn/x86/pentium/lshift.S b/ghc/rts/gmp/mpn/x86/pentium/lshift.S
new file mode 100644
index 0000000000..b298983563
--- /dev/null
+++ b/ghc/rts/gmp/mpn/x86/pentium/lshift.S
@@ -0,0 +1,217 @@
+/* Pentium optimized __mpn_lshift --
+
+Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+ INPUT PARAMETERS
+ res_ptr (sp + 4)
+ s_ptr (sp + 8)
+ size (sp + 12)
+ cnt (sp + 16)
+*/
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+.text
+ ALIGN (3)
+ .globl C_SYMBOL_NAME(__mpn_lshift)
+C_SYMBOL_NAME(__mpn_lshift:)
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ pushl %ebp
+
+ movl 20(%esp),%edi /* res_ptr */
+ movl 24(%esp),%esi /* s_ptr */
+ movl 28(%esp),%ebp /* size */
+ movl 32(%esp),%ecx /* cnt */
+
+/* We can use faster code for shift-by-1 under certain conditions. */
+ cmp $1,%ecx
+ jne Lnormal
+ leal 4(%esi),%eax
+ cmpl %edi,%eax
+ jnc Lspecial /* jump if s_ptr + 1 >= res_ptr */
+ leal (%esi,%ebp,4),%eax
+ cmpl %eax,%edi
+ jnc Lspecial /* jump if res_ptr >= s_ptr + size */
+
+Lnormal:
+ leal -4(%edi,%ebp,4),%edi
+ leal -4(%esi,%ebp,4),%esi
+
+ movl (%esi),%edx
+ subl $4,%esi
+ xorl %eax,%eax
+ shldl %cl,%edx,%eax /* compute carry limb */
+ pushl %eax /* push carry limb onto stack */
+
+ decl %ebp
+ pushl %ebp
+ shrl $3,%ebp
+ jz Lend
+
+ movl (%edi),%eax /* fetch destination cache line */
+
+ ALIGN (2)
+Loop: movl -28(%edi),%eax /* fetch destination cache line */
+ movl %edx,%ebx
+
+ movl (%esi),%eax
+ movl -4(%esi),%edx
+ shldl %cl,%eax,%ebx
+ shldl %cl,%edx,%eax
+ movl %ebx,(%edi)
+ movl %eax,-4(%edi)
+
+ movl -8(%esi),%ebx
+ movl -12(%esi),%eax
+ shldl %cl,%ebx,%edx
+ shldl %cl,%eax,%ebx
+ movl %edx,-8(%edi)
+ movl %ebx,-12(%edi)
+
+ movl -16(%esi),%edx
+ movl -20(%esi),%ebx
+ shldl %cl,%edx,%eax
+ shldl %cl,%ebx,%edx
+ movl %eax,-16(%edi)
+ movl %edx,-20(%edi)
+
+ movl -24(%esi),%eax
+ movl -28(%esi),%edx
+ shldl %cl,%eax,%ebx
+ shldl %cl,%edx,%eax
+ movl %ebx,-24(%edi)
+ movl %eax,-28(%edi)
+
+ subl $32,%esi
+ subl $32,%edi
+ decl %ebp
+ jnz Loop
+
+Lend: popl %ebp
+ andl $7,%ebp
+ jz Lend2
+Loop2: movl (%esi),%eax
+ shldl %cl,%eax,%edx
+ movl %edx,(%edi)
+ movl %eax,%edx
+ subl $4,%esi
+ subl $4,%edi
+ decl %ebp
+ jnz Loop2
+
+Lend2: shll %cl,%edx /* compute least significant limb */
+ movl %edx,(%edi) /* store it */
+
+ popl %eax /* pop carry limb */
+
+ popl %ebp
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
+/* We loop from least significant end of the arrays, which is only
+ permissable if the source and destination don't overlap, since the
+ function is documented to work for overlapping source and destination.
+*/
+
+Lspecial:
+ movl (%esi),%edx
+ addl $4,%esi
+
+ decl %ebp
+ pushl %ebp
+ shrl $3,%ebp
+
+ addl %edx,%edx
+ incl %ebp
+ decl %ebp
+ jz LLend
+
+ movl (%edi),%eax /* fetch destination cache line */
+
+ ALIGN (2)
+LLoop: movl 28(%edi),%eax /* fetch destination cache line */
+ movl %edx,%ebx
+
+ movl (%esi),%eax
+ movl 4(%esi),%edx
+ adcl %eax,%eax
+ movl %ebx,(%edi)
+ adcl %edx,%edx
+ movl %eax,4(%edi)
+
+ movl 8(%esi),%ebx
+ movl 12(%esi),%eax
+ adcl %ebx,%ebx
+ movl %edx,8(%edi)
+ adcl %eax,%eax
+ movl %ebx,12(%edi)
+
+ movl 16(%esi),%edx
+ movl 20(%esi),%ebx
+ adcl %edx,%edx
+ movl %eax,16(%edi)
+ adcl %ebx,%ebx
+ movl %edx,20(%edi)
+
+ movl 24(%esi),%eax
+ movl 28(%esi),%edx
+ adcl %eax,%eax
+ movl %ebx,24(%edi)
+ adcl %edx,%edx
+ movl %eax,28(%edi)
+
+ leal 32(%esi),%esi /* use leal not to clobber carry */
+ leal 32(%edi),%edi
+ decl %ebp
+ jnz LLoop
+
+LLend: popl %ebp
+ sbbl %eax,%eax /* save carry in %eax */
+ andl $7,%ebp
+ jz LLend2
+ addl %eax,%eax /* restore carry from eax */
+LLoop2: movl %edx,%ebx
+ movl (%esi),%edx
+ adcl %edx,%edx
+ movl %ebx,(%edi)
+
+ leal 4(%esi),%esi /* use leal not to clobber carry */
+ leal 4(%edi),%edi
+ decl %ebp
+ jnz LLoop2
+
+ jmp LL1
+LLend2: addl %eax,%eax /* restore carry from eax */
+LL1: movl %edx,(%edi) /* store last limb */
+
+ sbbl %eax,%eax
+ negl %eax
+
+ popl %ebp
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
diff --git a/ghc/rts/gmp/mpn/x86/pentium/mul_1.S b/ghc/rts/gmp/mpn/x86/pentium/mul_1.S
new file mode 100644
index 0000000000..4ac3050a61
--- /dev/null
+++ b/ghc/rts/gmp/mpn/x86/pentium/mul_1.S
@@ -0,0 +1,79 @@
+/* Pentium __mpn_mul_1 -- Multiply a limb vector with a limb and store
+ the result in a second limb vector.
+
+Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+ INPUT PARAMETERS
+ res_ptr (sp + 4)
+ s1_ptr (sp + 8)
+ size (sp + 12)
+ s2_limb (sp + 16)
+*/
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define res_ptr edi
+#define s1_ptr esi
+#define size ecx
+#define s2_limb ebp
+
+ TEXT
+ ALIGN (3)
+ GLOBL C_SYMBOL_NAME(__mpn_mul_1)
+C_SYMBOL_NAME(__mpn_mul_1:)
+
+ INSN1(push,l ,R(edi))
+ INSN1(push,l ,R(esi))
+ INSN1(push,l ,R(ebx))
+ INSN1(push,l ,R(ebp))
+
+ INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20))
+ INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24))
+ INSN2(mov,l ,R(size),MEM_DISP(esp,28))
+ INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32))
+
+ INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4))
+ INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4))
+ INSN1(neg,l ,R(size))
+ INSN2(xor,l ,R(ebx),R(ebx))
+ ALIGN (3)
+
+Loop: INSN2(adc,l ,R(ebx),$0)
+ INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4))
+
+ INSN1(mul,l ,R(s2_limb))
+
+ INSN2(add,l ,R(ebx),R(eax))
+
+ INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(ebx))
+ INSN1(inc,l ,R(size))
+
+ INSN2(mov,l ,R(ebx),R(edx))
+ INSN1(jnz, ,Loop)
+
+ INSN2(adc,l ,R(ebx),$0)
+ INSN2(mov,l ,R(eax),R(ebx))
+ INSN1(pop,l ,R(ebp))
+ INSN1(pop,l ,R(ebx))
+ INSN1(pop,l ,R(esi))
+ INSN1(pop,l ,R(edi))
+ ret
diff --git a/ghc/rts/gmp/mpn/x86/pentium/rshift.S b/ghc/rts/gmp/mpn/x86/pentium/rshift.S
new file mode 100644
index 0000000000..38398edb13
--- /dev/null
+++ b/ghc/rts/gmp/mpn/x86/pentium/rshift.S
@@ -0,0 +1,217 @@
+/* Pentium optimized __mpn_rshift --
+
+Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+ INPUT PARAMETERS
+ res_ptr (sp + 4)
+ s_ptr (sp + 8)
+ size (sp + 12)
+ cnt (sp + 16)
+*/
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+.text
+ ALIGN (3)
+ .globl C_SYMBOL_NAME(__mpn_rshift)
+C_SYMBOL_NAME(__mpn_rshift:)
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ pushl %ebp
+
+ movl 20(%esp),%edi /* res_ptr */
+ movl 24(%esp),%esi /* s_ptr */
+ movl 28(%esp),%ebp /* size */
+ movl 32(%esp),%ecx /* cnt */
+
+/* We can use faster code for shift-by-1 under certain conditions. */
+ cmp $1,%ecx
+ jne Lnormal
+ leal 4(%edi),%eax
+ cmpl %esi,%eax
+ jnc Lspecial /* jump if res_ptr + 1 >= s_ptr */
+ leal (%edi,%ebp,4),%eax
+ cmpl %eax,%esi
+ jnc Lspecial /* jump if s_ptr >= res_ptr + size */
+
+Lnormal:
+ movl (%esi),%edx
+ addl $4,%esi
+ xorl %eax,%eax
+ shrdl %cl,%edx,%eax /* compute carry limb */
+ pushl %eax /* push carry limb onto stack */
+
+ decl %ebp
+ pushl %ebp
+ shrl $3,%ebp
+ jz Lend
+
+ movl (%edi),%eax /* fetch destination cache line */
+
+ ALIGN (2)
+Loop: movl 28(%edi),%eax /* fetch destination cache line */
+ movl %edx,%ebx
+
+ movl (%esi),%eax
+ movl 4(%esi),%edx
+ shrdl %cl,%eax,%ebx
+ shrdl %cl,%edx,%eax
+ movl %ebx,(%edi)
+ movl %eax,4(%edi)
+
+ movl 8(%esi),%ebx
+ movl 12(%esi),%eax
+ shrdl %cl,%ebx,%edx
+ shrdl %cl,%eax,%ebx
+ movl %edx,8(%edi)
+ movl %ebx,12(%edi)
+
+ movl 16(%esi),%edx
+ movl 20(%esi),%ebx
+ shrdl %cl,%edx,%eax
+ shrdl %cl,%ebx,%edx
+ movl %eax,16(%edi)
+ movl %edx,20(%edi)
+
+ movl 24(%esi),%eax
+ movl 28(%esi),%edx
+ shrdl %cl,%eax,%ebx
+ shrdl %cl,%edx,%eax
+ movl %ebx,24(%edi)
+ movl %eax,28(%edi)
+
+ addl $32,%esi
+ addl $32,%edi
+ decl %ebp
+ jnz Loop
+
+Lend: popl %ebp
+ andl $7,%ebp
+ jz Lend2
+Loop2: movl (%esi),%eax
+ shrdl %cl,%eax,%edx /* compute result limb */
+ movl %edx,(%edi)
+ movl %eax,%edx
+ addl $4,%esi
+ addl $4,%edi
+ decl %ebp
+ jnz Loop2
+
+Lend2: shrl %cl,%edx /* compute most significant limb */
+ movl %edx,(%edi) /* store it */
+
+ popl %eax /* pop carry limb */
+
+ popl %ebp
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
+/* We loop from least significant end of the arrays, which is only
+ permissable if the source and destination don't overlap, since the
+ function is documented to work for overlapping source and destination.
+*/
+
+Lspecial:
+ leal -4(%edi,%ebp,4),%edi
+ leal -4(%esi,%ebp,4),%esi
+
+ movl (%esi),%edx
+ subl $4,%esi
+
+ decl %ebp
+ pushl %ebp
+ shrl $3,%ebp
+
+ shrl $1,%edx
+ incl %ebp
+ decl %ebp
+ jz LLend
+
+ movl (%edi),%eax /* fetch destination cache line */
+
+ ALIGN (2)
+LLoop: movl -28(%edi),%eax /* fetch destination cache line */
+ movl %edx,%ebx
+
+ movl (%esi),%eax
+ movl -4(%esi),%edx
+ rcrl $1,%eax
+ movl %ebx,(%edi)
+ rcrl $1,%edx
+ movl %eax,-4(%edi)
+
+ movl -8(%esi),%ebx
+ movl -12(%esi),%eax
+ rcrl $1,%ebx
+ movl %edx,-8(%edi)
+ rcrl $1,%eax
+ movl %ebx,-12(%edi)
+
+ movl -16(%esi),%edx
+ movl -20(%esi),%ebx
+ rcrl $1,%edx
+ movl %eax,-16(%edi)
+ rcrl $1,%ebx
+ movl %edx,-20(%edi)
+
+ movl -24(%esi),%eax
+ movl -28(%esi),%edx
+ rcrl $1,%eax
+ movl %ebx,-24(%edi)
+ rcrl $1,%edx
+ movl %eax,-28(%edi)
+
+ leal -32(%esi),%esi /* use leal not to clobber carry */
+ leal -32(%edi),%edi
+ decl %ebp
+ jnz LLoop
+
+LLend: popl %ebp
+ sbbl %eax,%eax /* save carry in %eax */
+ andl $7,%ebp
+ jz LLend2
+ addl %eax,%eax /* restore carry from eax */
+LLoop2: movl %edx,%ebx
+ movl (%esi),%edx
+ rcrl $1,%edx
+ movl %ebx,(%edi)
+
+ leal -4(%esi),%esi /* use leal not to clobber carry */
+ leal -4(%edi),%edi
+ decl %ebp
+ jnz LLoop2
+
+ jmp LL1
+LLend2: addl %eax,%eax /* restore carry from eax */
+LL1: movl %edx,(%edi) /* store last limb */
+
+ movl $0,%eax
+ rcrl $1,%eax
+
+ popl %ebp
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
diff --git a/ghc/rts/gmp/mpn/x86/pentium/sub_n.S b/ghc/rts/gmp/mpn/x86/pentium/sub_n.S
new file mode 100644
index 0000000000..d1a2bc0840
--- /dev/null
+++ b/ghc/rts/gmp/mpn/x86/pentium/sub_n.S
@@ -0,0 +1,130 @@
+/* Pentium __mpn_sub_n -- Subtract two limb vectors of the same length > 0
+ and store difference in a third limb vector.
+
+Copyright (C) 1992, 1994, 1995, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+ INPUT PARAMETERS
+ res_ptr (sp + 4)
+ s1_ptr (sp + 8)
+ s2_ptr (sp + 12)
+ size (sp + 16)
+*/
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+.text
+ ALIGN (3)
+ .globl C_SYMBOL_NAME(__mpn_sub_n)
+C_SYMBOL_NAME(__mpn_sub_n:)
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ pushl %ebp
+
+ movl 20(%esp),%edi /* res_ptr */
+ movl 24(%esp),%esi /* s1_ptr */
+ movl 28(%esp),%ebp /* s2_ptr */
+ movl 32(%esp),%ecx /* size */
+
+ movl (%ebp),%ebx
+
+ decl %ecx
+ movl %ecx,%edx
+ shrl $3,%ecx
+ andl $7,%edx
+ testl %ecx,%ecx /* zero carry flag */
+ jz Lend
+ pushl %edx
+
+ ALIGN (3)
+Loop: movl 28(%edi),%eax /* fetch destination cache line */
+ leal 32(%edi),%edi
+
+L1: movl (%esi),%eax
+ movl 4(%esi),%edx
+ sbbl %ebx,%eax
+ movl 4(%ebp),%ebx
+ sbbl %ebx,%edx
+ movl 8(%ebp),%ebx
+ movl %eax,-32(%edi)
+ movl %edx,-28(%edi)
+
+L2: movl 8(%esi),%eax
+ movl 12(%esi),%edx
+ sbbl %ebx,%eax
+ movl 12(%ebp),%ebx
+ sbbl %ebx,%edx
+ movl 16(%ebp),%ebx
+ movl %eax,-24(%edi)
+ movl %edx,-20(%edi)
+
+L3: movl 16(%esi),%eax
+ movl 20(%esi),%edx
+ sbbl %ebx,%eax
+ movl 20(%ebp),%ebx
+ sbbl %ebx,%edx
+ movl 24(%ebp),%ebx
+ movl %eax,-16(%edi)
+ movl %edx,-12(%edi)
+
+L4: movl 24(%esi),%eax
+ movl 28(%esi),%edx
+ sbbl %ebx,%eax
+ movl 28(%ebp),%ebx
+ sbbl %ebx,%edx
+ movl 32(%ebp),%ebx
+ movl %eax,-8(%edi)
+ movl %edx,-4(%edi)
+
+ leal 32(%esi),%esi
+ leal 32(%ebp),%ebp
+ decl %ecx
+ jnz Loop
+
+ popl %edx
+Lend:
+ decl %edx /* test %edx w/o clobbering carry */
+ js Lend2
+ incl %edx
+Loop2:
+ leal 4(%edi),%edi
+ movl (%esi),%eax
+ sbbl %ebx,%eax
+ movl 4(%ebp),%ebx
+ movl %eax,-4(%edi)
+ leal 4(%esi),%esi
+ leal 4(%ebp),%ebp
+ decl %edx
+ jnz Loop2
+Lend2:
+ movl (%esi),%eax
+ sbbl %ebx,%eax
+ movl %eax,(%edi)
+
+ sbbl %eax,%eax
+ negl %eax
+
+ popl %ebp
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
diff --git a/ghc/rts/gmp/mpn/x86/pentium/submul_1.S b/ghc/rts/gmp/mpn/x86/pentium/submul_1.S
new file mode 100644
index 0000000000..adf2d63e68
--- /dev/null
+++ b/ghc/rts/gmp/mpn/x86/pentium/submul_1.S
@@ -0,0 +1,83 @@
+/* Pentium __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
+ the result from a second limb vector.
+
+Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+ INPUT PARAMETERS
+ res_ptr (sp + 4)
+ s1_ptr (sp + 8)
+ size (sp + 12)
+ s2_limb (sp + 16)
+*/
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define res_ptr edi
+#define s1_ptr esi
+#define size ecx
+#define s2_limb ebp
+
+ TEXT
+ ALIGN (3)
+ GLOBL C_SYMBOL_NAME(__mpn_submul_1)
+C_SYMBOL_NAME(__mpn_submul_1:)
+
+ INSN1(push,l ,R(edi))
+ INSN1(push,l ,R(esi))
+ INSN1(push,l ,R(ebx))
+ INSN1(push,l ,R(ebp))
+
+ INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20))
+ INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24))
+ INSN2(mov,l ,R(size),MEM_DISP(esp,28))
+ INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32))
+
+ INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4))
+ INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4))
+ INSN1(neg,l ,R(size))
+ INSN2(xor,l ,R(ebx),R(ebx))
+ ALIGN (3)
+
+Loop: INSN2(adc,l ,R(ebx),$0)
+ INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4))
+
+ INSN1(mul,l ,R(s2_limb))
+
+ INSN2(add,l ,R(eax),R(ebx))
+ INSN2(mov,l ,R(ebx),MEM_INDEX(res_ptr,size,4))
+
+ INSN2(adc,l ,R(edx),$0)
+ INSN2(sub,l ,R(ebx),R(eax))
+
+ INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(ebx))
+ INSN1(inc,l ,R(size))
+
+ INSN2(mov,l ,R(ebx),R(edx))
+ INSN1(jnz, ,Loop)
+
+ INSN2(adc,l ,R(ebx),$0)
+ INSN2(mov,l ,R(eax),R(ebx))
+ INSN1(pop,l ,R(ebp))
+ INSN1(pop,l ,R(ebx))
+ INSN1(pop,l ,R(esi))
+ INSN1(pop,l ,R(edi))
+ ret
diff --git a/ghc/rts/gmp/mpn/x86/submul_1.S b/ghc/rts/gmp/mpn/x86/submul_1.S
new file mode 100644
index 0000000000..730e732045
--- /dev/null
+++ b/ghc/rts/gmp/mpn/x86/submul_1.S
@@ -0,0 +1,76 @@
+/* i80386 __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
+ the result from a second limb vector.
+
+Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+ INPUT PARAMETERS
+ res_ptr (sp + 4)
+ s1_ptr (sp + 8)
+ size (sp + 12)
+ s2_limb (sp + 16)
+*/
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define res_ptr edi
+#define s1_ptr esi
+#define size ecx
+#define s2_limb ebp
+
+ TEXT
+ ALIGN (3)
+ GLOBL C_SYMBOL_NAME(__mpn_submul_1)
+C_SYMBOL_NAME(__mpn_submul_1:)
+
+ INSN1(push,l ,R(edi))
+ INSN1(push,l ,R(esi))
+ INSN1(push,l ,R(ebx))
+ INSN1(push,l ,R(ebp))
+
+ INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20))
+ INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24))
+ INSN2(mov,l ,R(size),MEM_DISP(esp,28))
+ INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32))
+
+ INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4))
+ INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4))
+ INSN1(neg,l ,R(size))
+ INSN2(xor,l ,R(ebx),R(ebx))
+ ALIGN (3)
+Loop:
+ INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4))
+ INSN1(mul,l ,R(s2_limb))
+ INSN2(add,l ,R(eax),R(ebx))
+ INSN2(adc,l ,R(edx),$0)
+ INSN2(sub,l ,MEM_INDEX(res_ptr,size,4),R(eax))
+ INSN2(adc,l ,R(edx),$0)
+ INSN2(mov,l ,R(ebx),R(edx))
+
+ INSN1(inc,l ,R(size))
+ INSN1(jnz, ,Loop)
+ INSN2(mov,l ,R(eax),R(ebx))
+
+ INSN1(pop,l ,R(ebp))
+ INSN1(pop,l ,R(ebx))
+ INSN1(pop,l ,R(esi))
+ INSN1(pop,l ,R(edi))
+ ret
diff --git a/ghc/rts/gmp/mpn/x86/syntax.h b/ghc/rts/gmp/mpn/x86/syntax.h
new file mode 100644
index 0000000000..c53c73c03f
--- /dev/null
+++ b/ghc/rts/gmp/mpn/x86/syntax.h
@@ -0,0 +1,62 @@
+/* asm.h -- Definitions for x86 syntax variations.
+
+Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+
+#undef ALIGN
+
+#if defined (BSD_SYNTAX) || defined (ELF_SYNTAX)
+#define R(r) %r
+#define MEM(base)(base)
+#define MEM_DISP(base,displacement)displacement(R(base))
+#define MEM_INDEX(base,index,size)(R(base),R(index),size)
+#ifdef __STDC__
+#define INSN1(mnemonic,size_suffix,dst)mnemonic##size_suffix dst
+#define INSN2(mnemonic,size_suffix,dst,src)mnemonic##size_suffix src,dst
+#else
+#define INSN1(mnemonic,size_suffix,dst)mnemonic/**/size_suffix dst
+#define INSN2(mnemonic,size_suffix,dst,src)mnemonic/**/size_suffix src,dst
+#endif
+#define TEXT .text
+#if defined (BSD_SYNTAX)
+#define ALIGN(log) .align log
+#endif
+#if defined (ELF_SYNTAX)
+#define ALIGN(log) .align 1<<(log)
+#endif
+#define GLOBL .globl
+#endif
+
+#ifdef INTEL_SYNTAX
+#define R(r) r
+#define MEM(base)[base]
+#define MEM_DISP(base,displacement)[base+(displacement)]
+#define MEM_INDEX(base,index,size)[base+index*size]
+#define INSN1(mnemonic,size_suffix,dst)mnemonic dst
+#define INSN2(mnemonic,size_suffix,dst,src)mnemonic dst,src
+#define TEXT .text
+#define ALIGN(log) .align log
+#define GLOBL .globl
+#endif
+
+#ifdef BROKEN_ALIGN
+#undef ALIGN
+#define ALIGN(log) .align log,0x90
+#endif