diff options
author | simonm <unknown> | 1998-06-05 14:37:58 +0000 |
---|---|---|
committer | simonm <unknown> | 1998-06-05 14:37:58 +0000 |
commit | 739d408b7f5b024bda531397d48315782c9f51d2 (patch) | |
tree | 8edbfbf55a34310d263e7da926652f88e2a93243 /ghc/rts/gmp/mpn | |
parent | 798c2ac0ef1ba9ee9711275a964af7a2575c4d52 (diff) | |
download | haskell-739d408b7f5b024bda531397d48315782c9f51d2.tar.gz |
[project @ 1998-06-05 14:37:58 by simonm]
Initial revision
Diffstat (limited to 'ghc/rts/gmp/mpn')
-rw-r--r-- | ghc/rts/gmp/mpn/x86/pentium/add_n.S | 130 | ||||
-rw-r--r-- | ghc/rts/gmp/mpn/x86/pentium/addmul_1.S | 83 | ||||
-rw-r--r-- | ghc/rts/gmp/mpn/x86/pentium/lshift.S | 217 | ||||
-rw-r--r-- | ghc/rts/gmp/mpn/x86/pentium/mul_1.S | 79 | ||||
-rw-r--r-- | ghc/rts/gmp/mpn/x86/pentium/rshift.S | 217 | ||||
-rw-r--r-- | ghc/rts/gmp/mpn/x86/pentium/sub_n.S | 130 | ||||
-rw-r--r-- | ghc/rts/gmp/mpn/x86/pentium/submul_1.S | 83 | ||||
-rw-r--r-- | ghc/rts/gmp/mpn/x86/submul_1.S | 76 | ||||
-rw-r--r-- | ghc/rts/gmp/mpn/x86/syntax.h | 62 |
9 files changed, 1077 insertions, 0 deletions
diff --git a/ghc/rts/gmp/mpn/x86/pentium/add_n.S b/ghc/rts/gmp/mpn/x86/pentium/add_n.S new file mode 100644 index 0000000000..ac6f2819b2 --- /dev/null +++ b/ghc/rts/gmp/mpn/x86/pentium/add_n.S @@ -0,0 +1,130 @@ +/* Pentium __mpn_add_n -- Add two limb vectors of the same length > 0 and store + sum in a third limb vector. + +Copyright (C) 1992, 1994, 1995, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s1_ptr (sp + 8) + s2_ptr (sp + 12) + size (sp + 16) +*/ + +#include "sysdep.h" +#include "asm-syntax.h" + +.text + ALIGN (3) + .globl C_SYMBOL_NAME(__mpn_add_n) +C_SYMBOL_NAME(__mpn_add_n:) + pushl %edi + pushl %esi + pushl %ebx + pushl %ebp + + movl 20(%esp),%edi /* res_ptr */ + movl 24(%esp),%esi /* s1_ptr */ + movl 28(%esp),%ebp /* s2_ptr */ + movl 32(%esp),%ecx /* size */ + + movl (%ebp),%ebx + + decl %ecx + movl %ecx,%edx + shrl $3,%ecx + andl $7,%edx + testl %ecx,%ecx /* zero carry flag */ + jz Lend + pushl %edx + + ALIGN (3) +Loop: movl 28(%edi),%eax /* fetch destination cache line */ + leal 32(%edi),%edi + +L1: movl (%esi),%eax + movl 4(%esi),%edx + adcl %ebx,%eax + movl 4(%ebp),%ebx + adcl %ebx,%edx + movl 8(%ebp),%ebx + movl %eax,-32(%edi) + movl %edx,-28(%edi) + +L2: movl 8(%esi),%eax + movl 12(%esi),%edx + adcl %ebx,%eax + movl 12(%ebp),%ebx + adcl %ebx,%edx + movl 16(%ebp),%ebx + movl %eax,-24(%edi) + movl %edx,-20(%edi) + +L3: movl 16(%esi),%eax + movl 20(%esi),%edx + adcl %ebx,%eax + movl 20(%ebp),%ebx + adcl %ebx,%edx + movl 24(%ebp),%ebx + movl %eax,-16(%edi) + movl %edx,-12(%edi) + +L4: movl 24(%esi),%eax + movl 28(%esi),%edx + adcl %ebx,%eax + movl 28(%ebp),%ebx + adcl %ebx,%edx + movl 32(%ebp),%ebx + movl %eax,-8(%edi) + movl %edx,-4(%edi) + + leal 32(%esi),%esi + leal 32(%ebp),%ebp + decl %ecx + jnz Loop + + popl %edx +Lend: + decl %edx /* test %edx w/o clobbering carry */ + js Lend2 + incl %edx +Loop2: + leal 4(%edi),%edi + movl (%esi),%eax + adcl %ebx,%eax + movl 4(%ebp),%ebx + movl %eax,-4(%edi) + leal 4(%esi),%esi + leal 4(%ebp),%ebp + decl %edx + jnz Loop2 +Lend2: + movl (%esi),%eax + adcl %ebx,%eax + movl %eax,(%edi) + + sbbl %eax,%eax + negl %eax + + popl %ebp + popl %ebx + popl %esi + popl %edi + ret diff --git a/ghc/rts/gmp/mpn/x86/pentium/addmul_1.S b/ghc/rts/gmp/mpn/x86/pentium/addmul_1.S new file mode 100644 index 0000000000..7cfa5db687 --- /dev/null +++ b/ghc/rts/gmp/mpn/x86/pentium/addmul_1.S @@ -0,0 +1,83 @@ +/* Pentium __mpn_addmul_1 -- Multiply a limb vector with a limb and add + the result to a second limb vector. + +Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s1_ptr (sp + 8) + size (sp + 12) + s2_limb (sp + 16) +*/ + +#include "sysdep.h" +#include "asm-syntax.h" + +#define res_ptr edi +#define s1_ptr esi +#define size ecx +#define s2_limb ebp + + TEXT + ALIGN (3) + GLOBL C_SYMBOL_NAME(__mpn_addmul_1) +C_SYMBOL_NAME(__mpn_addmul_1:) + + INSN1(push,l ,R(edi)) + INSN1(push,l ,R(esi)) + INSN1(push,l ,R(ebx)) + INSN1(push,l ,R(ebp)) + + INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20)) + INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24)) + INSN2(mov,l ,R(size),MEM_DISP(esp,28)) + INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32)) + + INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4)) + INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4)) + INSN1(neg,l ,R(size)) + INSN2(xor,l ,R(ebx),R(ebx)) + ALIGN (3) + +Loop: INSN2(adc,l ,R(ebx),$0) + INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4)) + + INSN1(mul,l ,R(s2_limb)) + + INSN2(add,l ,R(eax),R(ebx)) + INSN2(mov,l ,R(ebx),MEM_INDEX(res_ptr,size,4)) + + INSN2(adc,l ,R(edx),$0) + INSN2(add,l ,R(ebx),R(eax)) + + INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(ebx)) + INSN1(inc,l ,R(size)) + + INSN2(mov,l ,R(ebx),R(edx)) + INSN1(jnz, ,Loop) + + INSN2(adc,l ,R(ebx),$0) + INSN2(mov,l ,R(eax),R(ebx)) + INSN1(pop,l ,R(ebp)) + INSN1(pop,l ,R(ebx)) + INSN1(pop,l ,R(esi)) + INSN1(pop,l ,R(edi)) + ret diff --git a/ghc/rts/gmp/mpn/x86/pentium/lshift.S b/ghc/rts/gmp/mpn/x86/pentium/lshift.S new file mode 100644 index 0000000000..b298983563 --- /dev/null +++ b/ghc/rts/gmp/mpn/x86/pentium/lshift.S @@ -0,0 +1,217 @@ +/* Pentium optimized __mpn_lshift -- + +Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s_ptr (sp + 8) + size (sp + 12) + cnt (sp + 16) +*/ + +#include "sysdep.h" +#include "asm-syntax.h" + +.text + ALIGN (3) + .globl C_SYMBOL_NAME(__mpn_lshift) +C_SYMBOL_NAME(__mpn_lshift:) + pushl %edi + pushl %esi + pushl %ebx + pushl %ebp + + movl 20(%esp),%edi /* res_ptr */ + movl 24(%esp),%esi /* s_ptr */ + movl 28(%esp),%ebp /* size */ + movl 32(%esp),%ecx /* cnt */ + +/* We can use faster code for shift-by-1 under certain conditions. */ + cmp $1,%ecx + jne Lnormal + leal 4(%esi),%eax + cmpl %edi,%eax + jnc Lspecial /* jump if s_ptr + 1 >= res_ptr */ + leal (%esi,%ebp,4),%eax + cmpl %eax,%edi + jnc Lspecial /* jump if res_ptr >= s_ptr + size */ + +Lnormal: + leal -4(%edi,%ebp,4),%edi + leal -4(%esi,%ebp,4),%esi + + movl (%esi),%edx + subl $4,%esi + xorl %eax,%eax + shldl %cl,%edx,%eax /* compute carry limb */ + pushl %eax /* push carry limb onto stack */ + + decl %ebp + pushl %ebp + shrl $3,%ebp + jz Lend + + movl (%edi),%eax /* fetch destination cache line */ + + ALIGN (2) +Loop: movl -28(%edi),%eax /* fetch destination cache line */ + movl %edx,%ebx + + movl (%esi),%eax + movl -4(%esi),%edx + shldl %cl,%eax,%ebx + shldl %cl,%edx,%eax + movl %ebx,(%edi) + movl %eax,-4(%edi) + + movl -8(%esi),%ebx + movl -12(%esi),%eax + shldl %cl,%ebx,%edx + shldl %cl,%eax,%ebx + movl %edx,-8(%edi) + movl %ebx,-12(%edi) + + movl -16(%esi),%edx + movl -20(%esi),%ebx + shldl %cl,%edx,%eax + shldl %cl,%ebx,%edx + movl %eax,-16(%edi) + movl %edx,-20(%edi) + + movl -24(%esi),%eax + movl -28(%esi),%edx + shldl %cl,%eax,%ebx + shldl %cl,%edx,%eax + movl %ebx,-24(%edi) + movl %eax,-28(%edi) + + subl $32,%esi + subl $32,%edi + decl %ebp + jnz Loop + +Lend: popl %ebp + andl $7,%ebp + jz Lend2 +Loop2: movl (%esi),%eax + shldl %cl,%eax,%edx + movl %edx,(%edi) + movl %eax,%edx + subl $4,%esi + subl $4,%edi + decl %ebp + jnz Loop2 + +Lend2: shll %cl,%edx /* compute least significant limb */ + movl %edx,(%edi) /* store it */ + + popl %eax /* pop carry limb */ + + popl %ebp + popl %ebx + popl %esi + popl %edi + ret + +/* We loop from least significant end of the arrays, which is only + permissable if the source and destination don't overlap, since the + function is documented to work for overlapping source and destination. +*/ + +Lspecial: + movl (%esi),%edx + addl $4,%esi + + decl %ebp + pushl %ebp + shrl $3,%ebp + + addl %edx,%edx + incl %ebp + decl %ebp + jz LLend + + movl (%edi),%eax /* fetch destination cache line */ + + ALIGN (2) +LLoop: movl 28(%edi),%eax /* fetch destination cache line */ + movl %edx,%ebx + + movl (%esi),%eax + movl 4(%esi),%edx + adcl %eax,%eax + movl %ebx,(%edi) + adcl %edx,%edx + movl %eax,4(%edi) + + movl 8(%esi),%ebx + movl 12(%esi),%eax + adcl %ebx,%ebx + movl %edx,8(%edi) + adcl %eax,%eax + movl %ebx,12(%edi) + + movl 16(%esi),%edx + movl 20(%esi),%ebx + adcl %edx,%edx + movl %eax,16(%edi) + adcl %ebx,%ebx + movl %edx,20(%edi) + + movl 24(%esi),%eax + movl 28(%esi),%edx + adcl %eax,%eax + movl %ebx,24(%edi) + adcl %edx,%edx + movl %eax,28(%edi) + + leal 32(%esi),%esi /* use leal not to clobber carry */ + leal 32(%edi),%edi + decl %ebp + jnz LLoop + +LLend: popl %ebp + sbbl %eax,%eax /* save carry in %eax */ + andl $7,%ebp + jz LLend2 + addl %eax,%eax /* restore carry from eax */ +LLoop2: movl %edx,%ebx + movl (%esi),%edx + adcl %edx,%edx + movl %ebx,(%edi) + + leal 4(%esi),%esi /* use leal not to clobber carry */ + leal 4(%edi),%edi + decl %ebp + jnz LLoop2 + + jmp LL1 +LLend2: addl %eax,%eax /* restore carry from eax */ +LL1: movl %edx,(%edi) /* store last limb */ + + sbbl %eax,%eax + negl %eax + + popl %ebp + popl %ebx + popl %esi + popl %edi + ret diff --git a/ghc/rts/gmp/mpn/x86/pentium/mul_1.S b/ghc/rts/gmp/mpn/x86/pentium/mul_1.S new file mode 100644 index 0000000000..4ac3050a61 --- /dev/null +++ b/ghc/rts/gmp/mpn/x86/pentium/mul_1.S @@ -0,0 +1,79 @@ +/* Pentium __mpn_mul_1 -- Multiply a limb vector with a limb and store + the result in a second limb vector. + +Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s1_ptr (sp + 8) + size (sp + 12) + s2_limb (sp + 16) +*/ + +#include "sysdep.h" +#include "asm-syntax.h" + +#define res_ptr edi +#define s1_ptr esi +#define size ecx +#define s2_limb ebp + + TEXT + ALIGN (3) + GLOBL C_SYMBOL_NAME(__mpn_mul_1) +C_SYMBOL_NAME(__mpn_mul_1:) + + INSN1(push,l ,R(edi)) + INSN1(push,l ,R(esi)) + INSN1(push,l ,R(ebx)) + INSN1(push,l ,R(ebp)) + + INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20)) + INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24)) + INSN2(mov,l ,R(size),MEM_DISP(esp,28)) + INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32)) + + INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4)) + INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4)) + INSN1(neg,l ,R(size)) + INSN2(xor,l ,R(ebx),R(ebx)) + ALIGN (3) + +Loop: INSN2(adc,l ,R(ebx),$0) + INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4)) + + INSN1(mul,l ,R(s2_limb)) + + INSN2(add,l ,R(ebx),R(eax)) + + INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(ebx)) + INSN1(inc,l ,R(size)) + + INSN2(mov,l ,R(ebx),R(edx)) + INSN1(jnz, ,Loop) + + INSN2(adc,l ,R(ebx),$0) + INSN2(mov,l ,R(eax),R(ebx)) + INSN1(pop,l ,R(ebp)) + INSN1(pop,l ,R(ebx)) + INSN1(pop,l ,R(esi)) + INSN1(pop,l ,R(edi)) + ret diff --git a/ghc/rts/gmp/mpn/x86/pentium/rshift.S b/ghc/rts/gmp/mpn/x86/pentium/rshift.S new file mode 100644 index 0000000000..38398edb13 --- /dev/null +++ b/ghc/rts/gmp/mpn/x86/pentium/rshift.S @@ -0,0 +1,217 @@ +/* Pentium optimized __mpn_rshift -- + +Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s_ptr (sp + 8) + size (sp + 12) + cnt (sp + 16) +*/ + +#include "sysdep.h" +#include "asm-syntax.h" + +.text + ALIGN (3) + .globl C_SYMBOL_NAME(__mpn_rshift) +C_SYMBOL_NAME(__mpn_rshift:) + pushl %edi + pushl %esi + pushl %ebx + pushl %ebp + + movl 20(%esp),%edi /* res_ptr */ + movl 24(%esp),%esi /* s_ptr */ + movl 28(%esp),%ebp /* size */ + movl 32(%esp),%ecx /* cnt */ + +/* We can use faster code for shift-by-1 under certain conditions. */ + cmp $1,%ecx + jne Lnormal + leal 4(%edi),%eax + cmpl %esi,%eax + jnc Lspecial /* jump if res_ptr + 1 >= s_ptr */ + leal (%edi,%ebp,4),%eax + cmpl %eax,%esi + jnc Lspecial /* jump if s_ptr >= res_ptr + size */ + +Lnormal: + movl (%esi),%edx + addl $4,%esi + xorl %eax,%eax + shrdl %cl,%edx,%eax /* compute carry limb */ + pushl %eax /* push carry limb onto stack */ + + decl %ebp + pushl %ebp + shrl $3,%ebp + jz Lend + + movl (%edi),%eax /* fetch destination cache line */ + + ALIGN (2) +Loop: movl 28(%edi),%eax /* fetch destination cache line */ + movl %edx,%ebx + + movl (%esi),%eax + movl 4(%esi),%edx + shrdl %cl,%eax,%ebx + shrdl %cl,%edx,%eax + movl %ebx,(%edi) + movl %eax,4(%edi) + + movl 8(%esi),%ebx + movl 12(%esi),%eax + shrdl %cl,%ebx,%edx + shrdl %cl,%eax,%ebx + movl %edx,8(%edi) + movl %ebx,12(%edi) + + movl 16(%esi),%edx + movl 20(%esi),%ebx + shrdl %cl,%edx,%eax + shrdl %cl,%ebx,%edx + movl %eax,16(%edi) + movl %edx,20(%edi) + + movl 24(%esi),%eax + movl 28(%esi),%edx + shrdl %cl,%eax,%ebx + shrdl %cl,%edx,%eax + movl %ebx,24(%edi) + movl %eax,28(%edi) + + addl $32,%esi + addl $32,%edi + decl %ebp + jnz Loop + +Lend: popl %ebp + andl $7,%ebp + jz Lend2 +Loop2: movl (%esi),%eax + shrdl %cl,%eax,%edx /* compute result limb */ + movl %edx,(%edi) + movl %eax,%edx + addl $4,%esi + addl $4,%edi + decl %ebp + jnz Loop2 + +Lend2: shrl %cl,%edx /* compute most significant limb */ + movl %edx,(%edi) /* store it */ + + popl %eax /* pop carry limb */ + + popl %ebp + popl %ebx + popl %esi + popl %edi + ret + +/* We loop from least significant end of the arrays, which is only + permissable if the source and destination don't overlap, since the + function is documented to work for overlapping source and destination. +*/ + +Lspecial: + leal -4(%edi,%ebp,4),%edi + leal -4(%esi,%ebp,4),%esi + + movl (%esi),%edx + subl $4,%esi + + decl %ebp + pushl %ebp + shrl $3,%ebp + + shrl $1,%edx + incl %ebp + decl %ebp + jz LLend + + movl (%edi),%eax /* fetch destination cache line */ + + ALIGN (2) +LLoop: movl -28(%edi),%eax /* fetch destination cache line */ + movl %edx,%ebx + + movl (%esi),%eax + movl -4(%esi),%edx + rcrl $1,%eax + movl %ebx,(%edi) + rcrl $1,%edx + movl %eax,-4(%edi) + + movl -8(%esi),%ebx + movl -12(%esi),%eax + rcrl $1,%ebx + movl %edx,-8(%edi) + rcrl $1,%eax + movl %ebx,-12(%edi) + + movl -16(%esi),%edx + movl -20(%esi),%ebx + rcrl $1,%edx + movl %eax,-16(%edi) + rcrl $1,%ebx + movl %edx,-20(%edi) + + movl -24(%esi),%eax + movl -28(%esi),%edx + rcrl $1,%eax + movl %ebx,-24(%edi) + rcrl $1,%edx + movl %eax,-28(%edi) + + leal -32(%esi),%esi /* use leal not to clobber carry */ + leal -32(%edi),%edi + decl %ebp + jnz LLoop + +LLend: popl %ebp + sbbl %eax,%eax /* save carry in %eax */ + andl $7,%ebp + jz LLend2 + addl %eax,%eax /* restore carry from eax */ +LLoop2: movl %edx,%ebx + movl (%esi),%edx + rcrl $1,%edx + movl %ebx,(%edi) + + leal -4(%esi),%esi /* use leal not to clobber carry */ + leal -4(%edi),%edi + decl %ebp + jnz LLoop2 + + jmp LL1 +LLend2: addl %eax,%eax /* restore carry from eax */ +LL1: movl %edx,(%edi) /* store last limb */ + + movl $0,%eax + rcrl $1,%eax + + popl %ebp + popl %ebx + popl %esi + popl %edi + ret diff --git a/ghc/rts/gmp/mpn/x86/pentium/sub_n.S b/ghc/rts/gmp/mpn/x86/pentium/sub_n.S new file mode 100644 index 0000000000..d1a2bc0840 --- /dev/null +++ b/ghc/rts/gmp/mpn/x86/pentium/sub_n.S @@ -0,0 +1,130 @@ +/* Pentium __mpn_sub_n -- Subtract two limb vectors of the same length > 0 + and store difference in a third limb vector. + +Copyright (C) 1992, 1994, 1995, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s1_ptr (sp + 8) + s2_ptr (sp + 12) + size (sp + 16) +*/ + +#include "sysdep.h" +#include "asm-syntax.h" + +.text + ALIGN (3) + .globl C_SYMBOL_NAME(__mpn_sub_n) +C_SYMBOL_NAME(__mpn_sub_n:) + pushl %edi + pushl %esi + pushl %ebx + pushl %ebp + + movl 20(%esp),%edi /* res_ptr */ + movl 24(%esp),%esi /* s1_ptr */ + movl 28(%esp),%ebp /* s2_ptr */ + movl 32(%esp),%ecx /* size */ + + movl (%ebp),%ebx + + decl %ecx + movl %ecx,%edx + shrl $3,%ecx + andl $7,%edx + testl %ecx,%ecx /* zero carry flag */ + jz Lend + pushl %edx + + ALIGN (3) +Loop: movl 28(%edi),%eax /* fetch destination cache line */ + leal 32(%edi),%edi + +L1: movl (%esi),%eax + movl 4(%esi),%edx + sbbl %ebx,%eax + movl 4(%ebp),%ebx + sbbl %ebx,%edx + movl 8(%ebp),%ebx + movl %eax,-32(%edi) + movl %edx,-28(%edi) + +L2: movl 8(%esi),%eax + movl 12(%esi),%edx + sbbl %ebx,%eax + movl 12(%ebp),%ebx + sbbl %ebx,%edx + movl 16(%ebp),%ebx + movl %eax,-24(%edi) + movl %edx,-20(%edi) + +L3: movl 16(%esi),%eax + movl 20(%esi),%edx + sbbl %ebx,%eax + movl 20(%ebp),%ebx + sbbl %ebx,%edx + movl 24(%ebp),%ebx + movl %eax,-16(%edi) + movl %edx,-12(%edi) + +L4: movl 24(%esi),%eax + movl 28(%esi),%edx + sbbl %ebx,%eax + movl 28(%ebp),%ebx + sbbl %ebx,%edx + movl 32(%ebp),%ebx + movl %eax,-8(%edi) + movl %edx,-4(%edi) + + leal 32(%esi),%esi + leal 32(%ebp),%ebp + decl %ecx + jnz Loop + + popl %edx +Lend: + decl %edx /* test %edx w/o clobbering carry */ + js Lend2 + incl %edx +Loop2: + leal 4(%edi),%edi + movl (%esi),%eax + sbbl %ebx,%eax + movl 4(%ebp),%ebx + movl %eax,-4(%edi) + leal 4(%esi),%esi + leal 4(%ebp),%ebp + decl %edx + jnz Loop2 +Lend2: + movl (%esi),%eax + sbbl %ebx,%eax + movl %eax,(%edi) + + sbbl %eax,%eax + negl %eax + + popl %ebp + popl %ebx + popl %esi + popl %edi + ret diff --git a/ghc/rts/gmp/mpn/x86/pentium/submul_1.S b/ghc/rts/gmp/mpn/x86/pentium/submul_1.S new file mode 100644 index 0000000000..adf2d63e68 --- /dev/null +++ b/ghc/rts/gmp/mpn/x86/pentium/submul_1.S @@ -0,0 +1,83 @@ +/* Pentium __mpn_submul_1 -- Multiply a limb vector with a limb and subtract + the result from a second limb vector. + +Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s1_ptr (sp + 8) + size (sp + 12) + s2_limb (sp + 16) +*/ + +#include "sysdep.h" +#include "asm-syntax.h" + +#define res_ptr edi +#define s1_ptr esi +#define size ecx +#define s2_limb ebp + + TEXT + ALIGN (3) + GLOBL C_SYMBOL_NAME(__mpn_submul_1) +C_SYMBOL_NAME(__mpn_submul_1:) + + INSN1(push,l ,R(edi)) + INSN1(push,l ,R(esi)) + INSN1(push,l ,R(ebx)) + INSN1(push,l ,R(ebp)) + + INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20)) + INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24)) + INSN2(mov,l ,R(size),MEM_DISP(esp,28)) + INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32)) + + INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4)) + INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4)) + INSN1(neg,l ,R(size)) + INSN2(xor,l ,R(ebx),R(ebx)) + ALIGN (3) + +Loop: INSN2(adc,l ,R(ebx),$0) + INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4)) + + INSN1(mul,l ,R(s2_limb)) + + INSN2(add,l ,R(eax),R(ebx)) + INSN2(mov,l ,R(ebx),MEM_INDEX(res_ptr,size,4)) + + INSN2(adc,l ,R(edx),$0) + INSN2(sub,l ,R(ebx),R(eax)) + + INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(ebx)) + INSN1(inc,l ,R(size)) + + INSN2(mov,l ,R(ebx),R(edx)) + INSN1(jnz, ,Loop) + + INSN2(adc,l ,R(ebx),$0) + INSN2(mov,l ,R(eax),R(ebx)) + INSN1(pop,l ,R(ebp)) + INSN1(pop,l ,R(ebx)) + INSN1(pop,l ,R(esi)) + INSN1(pop,l ,R(edi)) + ret diff --git a/ghc/rts/gmp/mpn/x86/submul_1.S b/ghc/rts/gmp/mpn/x86/submul_1.S new file mode 100644 index 0000000000..730e732045 --- /dev/null +++ b/ghc/rts/gmp/mpn/x86/submul_1.S @@ -0,0 +1,76 @@ +/* i80386 __mpn_submul_1 -- Multiply a limb vector with a limb and subtract + the result from a second limb vector. + +Copyright (C) 1992, 1994 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s1_ptr (sp + 8) + size (sp + 12) + s2_limb (sp + 16) +*/ + +#include "sysdep.h" +#include "asm-syntax.h" + +#define res_ptr edi +#define s1_ptr esi +#define size ecx +#define s2_limb ebp + + TEXT + ALIGN (3) + GLOBL C_SYMBOL_NAME(__mpn_submul_1) +C_SYMBOL_NAME(__mpn_submul_1:) + + INSN1(push,l ,R(edi)) + INSN1(push,l ,R(esi)) + INSN1(push,l ,R(ebx)) + INSN1(push,l ,R(ebp)) + + INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20)) + INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24)) + INSN2(mov,l ,R(size),MEM_DISP(esp,28)) + INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32)) + + INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4)) + INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4)) + INSN1(neg,l ,R(size)) + INSN2(xor,l ,R(ebx),R(ebx)) + ALIGN (3) +Loop: + INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4)) + INSN1(mul,l ,R(s2_limb)) + INSN2(add,l ,R(eax),R(ebx)) + INSN2(adc,l ,R(edx),$0) + INSN2(sub,l ,MEM_INDEX(res_ptr,size,4),R(eax)) + INSN2(adc,l ,R(edx),$0) + INSN2(mov,l ,R(ebx),R(edx)) + + INSN1(inc,l ,R(size)) + INSN1(jnz, ,Loop) + INSN2(mov,l ,R(eax),R(ebx)) + + INSN1(pop,l ,R(ebp)) + INSN1(pop,l ,R(ebx)) + INSN1(pop,l ,R(esi)) + INSN1(pop,l ,R(edi)) + ret diff --git a/ghc/rts/gmp/mpn/x86/syntax.h b/ghc/rts/gmp/mpn/x86/syntax.h new file mode 100644 index 0000000000..c53c73c03f --- /dev/null +++ b/ghc/rts/gmp/mpn/x86/syntax.h @@ -0,0 +1,62 @@ +/* asm.h -- Definitions for x86 syntax variations. + +Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + + +#undef ALIGN + +#if defined (BSD_SYNTAX) || defined (ELF_SYNTAX) +#define R(r) %r +#define MEM(base)(base) +#define MEM_DISP(base,displacement)displacement(R(base)) +#define MEM_INDEX(base,index,size)(R(base),R(index),size) +#ifdef __STDC__ +#define INSN1(mnemonic,size_suffix,dst)mnemonic##size_suffix dst +#define INSN2(mnemonic,size_suffix,dst,src)mnemonic##size_suffix src,dst +#else +#define INSN1(mnemonic,size_suffix,dst)mnemonic/**/size_suffix dst +#define INSN2(mnemonic,size_suffix,dst,src)mnemonic/**/size_suffix src,dst +#endif +#define TEXT .text +#if defined (BSD_SYNTAX) +#define ALIGN(log) .align log +#endif +#if defined (ELF_SYNTAX) +#define ALIGN(log) .align 1<<(log) +#endif +#define GLOBL .globl +#endif + +#ifdef INTEL_SYNTAX +#define R(r) r +#define MEM(base)[base] +#define MEM_DISP(base,displacement)[base+(displacement)] +#define MEM_INDEX(base,index,size)[base+index*size] +#define INSN1(mnemonic,size_suffix,dst)mnemonic dst +#define INSN2(mnemonic,size_suffix,dst,src)mnemonic dst,src +#define TEXT .text +#define ALIGN(log) .align log +#define GLOBL .globl +#endif + +#ifdef BROKEN_ALIGN +#undef ALIGN +#define ALIGN(log) .align log,0x90 +#endif |