diff options
Diffstat (limited to 'rts/gmp/mpn/powerpc64')
-rw-r--r-- | rts/gmp/mpn/powerpc64/README | 36 | ||||
-rw-r--r-- | rts/gmp/mpn/powerpc64/add_n.asm | 61 | ||||
-rw-r--r-- | rts/gmp/mpn/powerpc64/addmul_1.asm | 52 | ||||
-rw-r--r-- | rts/gmp/mpn/powerpc64/addsub_n.asm | 107 | ||||
-rw-r--r-- | rts/gmp/mpn/powerpc64/aix.m4 | 40 | ||||
-rw-r--r-- | rts/gmp/mpn/powerpc64/copyd.asm | 45 | ||||
-rw-r--r-- | rts/gmp/mpn/powerpc64/copyi.asm | 44 | ||||
-rw-r--r-- | rts/gmp/mpn/powerpc64/gmp-mparam.h | 62 | ||||
-rw-r--r-- | rts/gmp/mpn/powerpc64/lshift.asm | 159 | ||||
-rw-r--r-- | rts/gmp/mpn/powerpc64/mul_1.asm | 49 | ||||
-rw-r--r-- | rts/gmp/mpn/powerpc64/rshift.asm | 60 | ||||
-rw-r--r-- | rts/gmp/mpn/powerpc64/sub_n.asm | 61 | ||||
-rw-r--r-- | rts/gmp/mpn/powerpc64/submul_1.asm | 54 |
13 files changed, 830 insertions, 0 deletions
diff --git a/rts/gmp/mpn/powerpc64/README b/rts/gmp/mpn/powerpc64/README new file mode 100644 index 0000000000..c779276917 --- /dev/null +++ b/rts/gmp/mpn/powerpc64/README @@ -0,0 +1,36 @@ +PPC630 (aka Power3) pipeline information: + +Decoding is 4-way and issue is 8-way with some out-of-order capability. +LS1 - ld/st unit 1 +LS2 - ld/st unit 2 +FXU1 - integer unit 1, handles any simple integer instructions +FXU2 - integer unit 2, handles any simple integer instructions +FXU3 - integer unit 3, handles integer multiply and divide +FPU1 - floating-point unit 1 +FPU2 - floating-point unit 2 + +Memory: Any two memory operations can issue, but memory subsystem + can sustain just one store per cycle. +Simple integer: 2 operations (such as add, rl*) +Integer multiply: 1 operation every 9th cycle worst case; exact timing depends + on 2nd operand most significant bit position (10 bits per + cycle). Multiply unit is not pipelined, only one multiply + operation in progress is allowed. +Integer divide: ? +Floating-point: Any plain 2 arithmetic instructions (such as fmul, fadd, fmadd) + Latency = 4. +Floating-point divide: + ? +Floating-point square root: + ? + +Best possible times for the main loops: +shift: 1.5 cycles limited by integer unit contention. + With 63 special loops, one for each shift count, we could + reduce the needed integer instructions to 2, which would + reduce the best possible time to 1 cycle. +add/sub: 1.5 cycles, limited by ld/st unit contention. +mul: 18 cycles (average) unless floating-point operations are used, + but that would only help for multiplies of perhaps 10 and more + limbs. +addmul/submul:Same situation as for mul. diff --git a/rts/gmp/mpn/powerpc64/add_n.asm b/rts/gmp/mpn/powerpc64/add_n.asm new file mode 100644 index 0000000000..c3325376dc --- /dev/null +++ b/rts/gmp/mpn/powerpc64/add_n.asm @@ -0,0 +1,61 @@ +# PowerPC-64 mpn_add_n -- Add two limb vectors of the same length > 0 and +# store sum in a third limb vector. + +# Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# s2_ptr r5 +# size r6 + +include(`../config.m4') + +ASM_START() +PROLOGUE(mpn_add_n) + mtctr r6 # copy size into CTR + addic r0,r0,0 # clear cy + ld r8,0(r4) # load least significant s1 limb + ld r0,0(r5) # load least significant s2 limb + addi r3,r3,-8 # offset res_ptr, it's updated before it's used + bdz .Lend # If done, skip loop +.Loop: ld r9,8(r4) # load s1 limb + ld r10,8(r5) # load s2 limb + adde r7,r0,r8 # add limbs with cy, set cy + std r7,8(r3) # store result limb + bdz .Lexit # decrement CTR and exit if done + ldu r8,16(r4) # load s1 limb and update s1_ptr + ldu r0,16(r5) # load s2 limb and update s2_ptr + adde r7,r10,r9 # add limbs with cy, set cy + stdu r7,16(r3) # store result limb and update res_ptr + bdnz .Loop # decrement CTR and loop back + +.Lend: adde r7,r0,r8 + std r7,8(r3) # store ultimate result limb + li r3,0 # load cy into ... + addze r3,r3 # ... return value register + blr +.Lexit: adde r7,r10,r9 + std r7,16(r3) + li r3,0 # load cy into ... + addze r3,r3 # ... return value register + blr +EPILOGUE(mpn_add_n) diff --git a/rts/gmp/mpn/powerpc64/addmul_1.asm b/rts/gmp/mpn/powerpc64/addmul_1.asm new file mode 100644 index 0000000000..81774482fe --- /dev/null +++ b/rts/gmp/mpn/powerpc64/addmul_1.asm @@ -0,0 +1,52 @@ +# PowerPC-64 mpn_addmul_1 -- Multiply a limb vector with a limb and add +# the result to a second limb vector. + +# Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# size r5 +# s2_limb r6 + +include(`../config.m4') + +ASM_START() +PROLOGUE(mpn_addmul_1) + mtctr 5 + li 9,0 # cy_limb = 0 + addic 0,0,0 + cal 3,-8(3) + cal 4,-8(4) +.Loop: + ldu 0,8(4) + ld 10,8(3) + mulld 7,0,6 + adde 7,7,9 + mulhdu 9,0,6 + addze 9,9 + addc 7,7,10 + stdu 7,8(3) + bdnz .Loop + + addze 3,9 + blr +EPILOGUE(mpn_addmul_1) diff --git a/rts/gmp/mpn/powerpc64/addsub_n.asm b/rts/gmp/mpn/powerpc64/addsub_n.asm new file mode 100644 index 0000000000..4ed40d71ae --- /dev/null +++ b/rts/gmp/mpn/powerpc64/addsub_n.asm @@ -0,0 +1,107 @@ +# PowerPC-64 mpn_addsub_n -- Simultaneous add and sub. + +# Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# s2_ptr r5 +# size r6 + +include(`asm-syntax.m4') + +define(SAVE_BORROW_RESTORE_CARRY, + `sldi $1,$1,63 + adde $1,$1,$1') +define(SAVE_CARRY_RESTORE_BORROW, + `sldi $1,$1,63 + adde $1,$1,$1') + +# 19991117 + +# This is just crafted for testing some ideas, and verifying that we can make +# it run fast. It runs at 2.55 cycles/limb on the 630, which is very good. +# We should play a little with the schedule. No time has been spent on that. + +# To finish this, the loop warm up and cool down code needs to be written, +# and the result need to be tested. Also, the proper calling sequence should +# be used. + +# r1p r2p s1p s2p n +# Use reg r0, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12 + +ASM_START() +PROLOGUE(mpn_addsub_n) + std r14,-64(1) + std r15,-56(1) + std r16,-48(1) + std r17,-40(1) + std r18,-32(1) + std r19,-24(1) + + srdi r7,r7,2 + mtctr r7 # copy size into CTR + addic r0,r0,0 # clear cy + addi r3,r3,-8 # offset res_ptr, it's updated before it's used + addi r4,r4,-8 # offset res_ptr, it's updated before it's used + +.Loop: + adde r12,r8,r9 + std r12,8(r3) + adde r12,r10,r11 + std r12,16(r3) + + SAVE_CARRY_RESTORE_BORROW(r0) + + subfe r12,r8,r9 + std r12,8(r4) + ld r8,8(r5) # s1 L 1 + ld r9,8(r6) # s2 L 1 + subfe r12,r10,r11 + std r12,16(r4) + ld r10,16(r5) # s1 L 2 + ld r11,16(r6) # s2 L 2 +# pair ------------------------- + subfe r12,r14,r15 + std r12,24(r4) + subfe r12,r16,r17 + stdu r12,32(r4) + + SAVE_BORROW_RESTORE_CARRY(r0) + + adde r12,r14,r15 + std r12,24(r3) + ld r14,24(r5) # s1 L 3 + ld r15,24(r6) # s2 L 3 + adde r12,r16,r17 + stdu r12,32(r3) + ldu r16,32(r5) # s1 L 4 + ldu r17,32(r6) # s2 L 4 + bdnz .Loop + + ld r14,-64(1) + ld r15,-56(1) + ld r16,-48(1) + ld r17,-40(1) + ld r18,-32(1) + ld r19,-24(1) + blr +EPILOGUE(mpn_addsub_n) diff --git a/rts/gmp/mpn/powerpc64/aix.m4 b/rts/gmp/mpn/powerpc64/aix.m4 new file mode 100644 index 0000000000..aee9f1f97a --- /dev/null +++ b/rts/gmp/mpn/powerpc64/aix.m4 @@ -0,0 +1,40 @@ +divert(-1) +dnl m4 macros for AIX 64-bit assembly. + +dnl Copyright (C) 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + +define(`ASM_START', + `.machine "ppc64" + .toc') + +define(`PROLOGUE', + ` + .globl $1 + .globl .$1 + .csect $1[DS],3 +$1: + .llong .$1, TOC[tc0], 0 + .csect .text[PR] + .align 2 +.$1:') + +define(`EPILOGUE', `') + +divert diff --git a/rts/gmp/mpn/powerpc64/copyd.asm b/rts/gmp/mpn/powerpc64/copyd.asm new file mode 100644 index 0000000000..d06e8c25fd --- /dev/null +++ b/rts/gmp/mpn/powerpc64/copyd.asm @@ -0,0 +1,45 @@ +# PowerPC-64 mpn_copyd -- Copy a limb vector. + +# Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# rptr r3 +# sptr r4 +# n r5 + +include(`../config.m4') + +# Unrolling this analogous to sparc64/copyi.s doesn't help for any +# operand sizes. + +ASM_START() +PROLOGUE(mpn_copyd) + cmpdi cr0,r5,0 + mtctr r5 + sldi r5,r5,3 + add r4,r4,r5 + add r3,r3,r5 + beq cr0,.Lend +.Loop: ldu r0,-8(r4) + stdu r0,-8(r3) + bdnz .Loop +.Lend: blr +EPILOGUE(mpn_copyd) diff --git a/rts/gmp/mpn/powerpc64/copyi.asm b/rts/gmp/mpn/powerpc64/copyi.asm new file mode 100644 index 0000000000..a1bedc4c5b --- /dev/null +++ b/rts/gmp/mpn/powerpc64/copyi.asm @@ -0,0 +1,44 @@ +# PowerPC-64 mpn_copyi -- Copy a limb vector. + +# Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# rptr r3 +# sptr r4 +# n r5 + +include(`../config.m4') + +# Unrolling this analogous to sparc64/copyi.s doesn't help for any +# operand sizes. + +ASM_START() +PROLOGUE(mpn_copyi) + cmpdi cr0,r5,0 + mtctr r5 + addi r4,r4,-8 + addi r3,r3,-8 + beq cr0,.Lend +.Loop: ldu r0,8(r4) + stdu r0,8(r3) + bdnz .Loop +.Lend: blr +EPILOGUE(mpn_copyi) diff --git a/rts/gmp/mpn/powerpc64/gmp-mparam.h b/rts/gmp/mpn/powerpc64/gmp-mparam.h new file mode 100644 index 0000000000..6fefb960cd --- /dev/null +++ b/rts/gmp/mpn/powerpc64/gmp-mparam.h @@ -0,0 +1,62 @@ +/* gmp-mparam.h -- Compiler/machine parameter header file. + +Copyright (C) 1991, 1993, 1994, 1995, 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#define BITS_PER_MP_LIMB 64 +#define BYTES_PER_MP_LIMB 8 +#define BITS_PER_LONGINT 64 +#define BITS_PER_INT 32 +#define BITS_PER_SHORTINT 16 +#define BITS_PER_CHAR 8 + +/* Generated by tuneup.c, 2000-07-16. */ + +#ifndef KARATSUBA_MUL_THRESHOLD +#define KARATSUBA_MUL_THRESHOLD 10 +#endif +#ifndef TOOM3_MUL_THRESHOLD +#define TOOM3_MUL_THRESHOLD 57 +#endif + +#ifndef KARATSUBA_SQR_THRESHOLD +#define KARATSUBA_SQR_THRESHOLD 16 +#endif +#ifndef TOOM3_SQR_THRESHOLD +#define TOOM3_SQR_THRESHOLD 89 +#endif + +#ifndef BZ_THRESHOLD +#define BZ_THRESHOLD 28 +#endif + +#ifndef FIB_THRESHOLD +#define FIB_THRESHOLD 216 +#endif + +#ifndef POWM_THRESHOLD +#define POWM_THRESHOLD 14 +#endif + +#ifndef GCD_ACCEL_THRESHOLD +#define GCD_ACCEL_THRESHOLD 6 +#endif +#ifndef GCDEXT_THRESHOLD +#define GCDEXT_THRESHOLD 163 +#endif diff --git a/rts/gmp/mpn/powerpc64/lshift.asm b/rts/gmp/mpn/powerpc64/lshift.asm new file mode 100644 index 0000000000..cef3a81fdd --- /dev/null +++ b/rts/gmp/mpn/powerpc64/lshift.asm @@ -0,0 +1,159 @@ +# PowerPC-64 mpn_lshift -- Shift a number left. + +# Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# size r5 +# cnt r6 + +include(`../config.m4') + +ASM_START() +PROLOGUE(mpn_lshift) + cmpdi cr0,r5,20 # more than 20 limbs? + sldi r0,r5,3 + add r4,r4,r0 # make r4 point at end of s1 + add r7,r3,r0 # make r7 point at end of res + bgt .LBIG # branch if more than 12 limbs + + mtctr r5 # copy size into CTR + subfic r8,r6,64 + ldu r11,-8(r4) # load first s1 limb + srd r3,r11,r8 # compute function return value + bdz .Lend1 + +.Loop: ldu r10,-8(r4) + sld r9,r11,r6 + srd r12,r10,r8 + or r9,r9,r12 + stdu r9,-8(r7) + bdz .Lend2 + ldu r11,-8(r4) + sld r9,r10,r6 + srd r12,r11,r8 + or r9,r9,r12 + stdu r9,-8(r7) + bdnz .Loop + +.Lend1: sld r0,r11,r6 + std r0,-8(r7) + blr +.Lend2: sld r0,r10,r6 + std r0,-8(r7) + blr + +.LBIG: + std r24,-64(1) + std r25,-56(1) + std r26,-48(1) + std r27,-40(1) + std r28,-32(1) + std r29,-24(1) + std r30,-16(1) + std r31,-8(1) + ldu r9,-8(r4) + subfic r8,r6,64 + srd r3,r9,r8 # compute function return value + sld r0,r9,r6 + addi r5,r5,-1 + + andi. r10,r5,3 # count for spill loop + beq .Le + mtctr r10 + ldu r28,-8(r4) + bdz .Lxe0 + +.Loop0: sld r12,r28,r6 + srd r24,r28,r8 + ldu r28,-8(r4) + or r24,r0,r24 + stdu r24,-8(r7) + mr r0,r12 + bdnz .Loop0 # taken at most once! + +.Lxe0: sld r12,r28,r6 + srd r24,r28,r8 + or r24,r0,r24 + stdu r24,-8(r7) + mr r0,r12 + +.Le: srdi r5,r5,2 # count for unrolled loop + addi r5,r5,-1 + mtctr r5 + ld r28,-8(r4) + ld r29,-16(r4) + ld r30,-24(r4) + ldu r31,-32(r4) + +.LoopU: sld r9,r28,r6 + srd r24,r28,r8 + ld r28,-8(r4) + sld r10,r29,r6 + srd r25,r29,r8 + ld r29,-16(r4) + sld r11,r30,r6 + srd r26,r30,r8 + ld r30,-24(r4) + sld r12,r31,r6 + srd r27,r31,r8 + ldu r31,-32(r4) + or r24,r0,r24 + std r24,-8(r7) + or r25,r9,r25 + std r25,-16(r7) + or r26,r10,r26 + std r26,-24(r7) + or r27,r11,r27 + stdu r27,-32(r7) + mr r0,r12 + bdnz .LoopU + + sld r9,r28,r6 + srd r24,r28,r8 + sld r10,r29,r6 + srd r25,r29,r8 + sld r11,r30,r6 + srd r26,r30,r8 + sld r12,r31,r6 + srd r27,r31,r8 + or r24,r0,r24 + std r24,-8(r7) + or r25,r9,r25 + std r25,-16(r7) + or r26,r10,r26 + std r26,-24(r7) + or r27,r11,r27 + stdu r27,-32(r7) + mr r0,r12 + + std r0,-8(r7) + ld r24,-64(1) + ld r25,-56(1) + ld r26,-48(1) + ld r27,-40(1) + ld r28,-32(1) + ld r29,-24(1) + ld r30,-16(1) + ld r31,-8(1) + blr +EPILOGUE(mpn_lshift) diff --git a/rts/gmp/mpn/powerpc64/mul_1.asm b/rts/gmp/mpn/powerpc64/mul_1.asm new file mode 100644 index 0000000000..47597283ff --- /dev/null +++ b/rts/gmp/mpn/powerpc64/mul_1.asm @@ -0,0 +1,49 @@ +# PowerPC-64 mpn_mul_1 -- Multiply a limb vector with a limb and store +# the result in a second limb vector. + +# Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# size r5 +# s2_limb r6 + +include(`../config.m4') + +ASM_START() +PROLOGUE(mpn_mul_1) + mtctr 5 + li 9,0 # cy_limb = 0 + addic 0,0,0 + cal 3,-8(3) + cal 4,-8(4) +.Loop: + ldu 0,8(4) + mulld 7,0,6 + adde 7,7,9 + mulhdu 9,0,6 + stdu 7,8(3) + bdnz .Loop + + addze 3,9 + blr +EPILOGUE(mpn_mul_1) diff --git a/rts/gmp/mpn/powerpc64/rshift.asm b/rts/gmp/mpn/powerpc64/rshift.asm new file mode 100644 index 0000000000..88272c7fa9 --- /dev/null +++ b/rts/gmp/mpn/powerpc64/rshift.asm @@ -0,0 +1,60 @@ +# PowerPC-64 mpn_rshift -- Shift a number right. + +# Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# size r5 +# cnt r6 + +include(`../config.m4') + +ASM_START() +PROLOGUE(mpn_rshift) + mtctr r5 # copy size into CTR + addi r7,r3,-8 # move adjusted res_ptr to free return reg + subfic r8,r6,64 + ld r11,0(r4) # load first s1 limb + sld r3,r11,r8 # compute function return value + bdz .Lend1 + +.Loop: ldu r10,8(r4) + srd r9,r11,r6 + sld r12,r10,r8 + or r9,r9,r12 + stdu r9,8(r7) + bdz .Lend2 + ldu r11,8(r4) + srd r9,r10,r6 + sld r12,r11,r8 + or r9,r9,r12 + stdu r9,8(r7) + bdnz .Loop + +.Lend1: srd r0,r11,r6 + std r0,8(r7) + blr + +.Lend2: srd r0,r10,r6 + std r0,8(r7) + blr +EPILOGUE(mpn_rshift) diff --git a/rts/gmp/mpn/powerpc64/sub_n.asm b/rts/gmp/mpn/powerpc64/sub_n.asm new file mode 100644 index 0000000000..4de3de69c7 --- /dev/null +++ b/rts/gmp/mpn/powerpc64/sub_n.asm @@ -0,0 +1,61 @@ +# PowerPC-64 mpn_sub_n -- Subtract two limb vectors of the same length > 0 +# and store difference in a third limb vector. + +# Copyright (C) 1999, 2000 Free Software Foundation, Inc.b + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# s2_ptr r5 +# size r6 + +include(`../config.m4') + +ASM_START() +PROLOGUE(mpn_sub_n) + mtctr r6 # copy size into CTR + addic r0,r6,-1 # set cy + ld r8,0(r4) # load least significant s1 limb + ld r0,0(r5) # load least significant s2 limb + addi r3,r3,-8 # offset res_ptr, it's updated before it's used + bdz .Lend # If done, skip loop +.Loop: ld r9,8(r4) # load s1 limb + ld r10,8(r5) # load s2 limb + subfe r7,r0,r8 # subtract limbs with cy, set cy + std r7,8(r3) # store result limb + bdz .Lexit # decrement CTR and exit if done + ldu r8,16(r4) # load s1 limb and update s1_ptr + ldu r0,16(r5) # load s2 limb and update s2_ptr + subfe r7,r10,r9 # subtract limbs with cy, set cy + stdu r7,16(r3) # store result limb and update res_ptr + bdnz .Loop # decrement CTR and loop back + +.Lend: subfe r7,r0,r8 + std r7,8(r3) # store ultimate result limb + subfe r3,r0,r0 # load !cy into ... + subfic r3,r3,0 # ... return value register + blr +.Lexit: subfe r7,r10,r9 + std r7,16(r3) + subfe r3,r0,r0 # load !cy into ... + subfic r3,r3,0 # ... return value register + blr +EPILOGUE(mpn_sub_n) diff --git a/rts/gmp/mpn/powerpc64/submul_1.asm b/rts/gmp/mpn/powerpc64/submul_1.asm new file mode 100644 index 0000000000..17f6369a38 --- /dev/null +++ b/rts/gmp/mpn/powerpc64/submul_1.asm @@ -0,0 +1,54 @@ +# PowerPC-64 mpn_submul_1 -- Multiply a limb vector with a limb and subtract +# the result from a second limb vector. + +# Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# size r5 +# s2_limb r6 + +include(`../config.m4') + +ASM_START() +PROLOGUE(mpn_submul_1) + mtctr 5 + li 9,0 # cy_limb = 0 + addic 0,0,0 + cal 3,-8(3) + cal 4,-8(4) +.Loop: + ldu 0,8(4) + ld 10,8(3) + mulld 7,0,6 + adde 7,7,9 + mulhdu 9,0,6 + addze 9,9 + subfc 7,7,10 + stdu 7,8(3) + subfe 11,11,11 # invert ... + addic 11,11,1 # ... carry + bdnz .Loop + + addze 3,9 + blr +EPILOGUE(mpn_submul_1) |