summaryrefslogtreecommitdiff
path: root/rts/gmp/mpn/powerpc64
diff options
context:
space:
mode:
Diffstat (limited to 'rts/gmp/mpn/powerpc64')
-rw-r--r--rts/gmp/mpn/powerpc64/README36
-rw-r--r--rts/gmp/mpn/powerpc64/add_n.asm61
-rw-r--r--rts/gmp/mpn/powerpc64/addmul_1.asm52
-rw-r--r--rts/gmp/mpn/powerpc64/addsub_n.asm107
-rw-r--r--rts/gmp/mpn/powerpc64/aix.m440
-rw-r--r--rts/gmp/mpn/powerpc64/copyd.asm45
-rw-r--r--rts/gmp/mpn/powerpc64/copyi.asm44
-rw-r--r--rts/gmp/mpn/powerpc64/gmp-mparam.h62
-rw-r--r--rts/gmp/mpn/powerpc64/lshift.asm159
-rw-r--r--rts/gmp/mpn/powerpc64/mul_1.asm49
-rw-r--r--rts/gmp/mpn/powerpc64/rshift.asm60
-rw-r--r--rts/gmp/mpn/powerpc64/sub_n.asm61
-rw-r--r--rts/gmp/mpn/powerpc64/submul_1.asm54
13 files changed, 830 insertions, 0 deletions
diff --git a/rts/gmp/mpn/powerpc64/README b/rts/gmp/mpn/powerpc64/README
new file mode 100644
index 0000000000..c779276917
--- /dev/null
+++ b/rts/gmp/mpn/powerpc64/README
@@ -0,0 +1,36 @@
+PPC630 (aka Power3) pipeline information:
+
+Decoding is 4-way and issue is 8-way with some out-of-order capability.
+LS1 - ld/st unit 1
+LS2 - ld/st unit 2
+FXU1 - integer unit 1, handles any simple integer instructions
+FXU2 - integer unit 2, handles any simple integer instructions
+FXU3 - integer unit 3, handles integer multiply and divide
+FPU1 - floating-point unit 1
+FPU2 - floating-point unit 2
+
+Memory: Any two memory operations can issue, but memory subsystem
+ can sustain just one store per cycle.
+Simple integer: 2 operations (such as add, rl*)
+Integer multiply: 1 operation every 9th cycle worst case; exact timing depends
+ on 2nd operand most significant bit position (10 bits per
+ cycle). Multiply unit is not pipelined, only one multiply
+ operation in progress is allowed.
+Integer divide: ?
+Floating-point: Any plain 2 arithmetic instructions (such as fmul, fadd, fmadd)
+ Latency = 4.
+Floating-point divide:
+ ?
+Floating-point square root:
+ ?
+
+Best possible times for the main loops:
+shift: 1.5 cycles limited by integer unit contention.
+ With 63 special loops, one for each shift count, we could
+ reduce the needed integer instructions to 2, which would
+ reduce the best possible time to 1 cycle.
+add/sub: 1.5 cycles, limited by ld/st unit contention.
+mul: 18 cycles (average) unless floating-point operations are used,
+ but that would only help for multiplies of perhaps 10 and more
+ limbs.
+addmul/submul:Same situation as for mul.
diff --git a/rts/gmp/mpn/powerpc64/add_n.asm b/rts/gmp/mpn/powerpc64/add_n.asm
new file mode 100644
index 0000000000..c3325376dc
--- /dev/null
+++ b/rts/gmp/mpn/powerpc64/add_n.asm
@@ -0,0 +1,61 @@
+# PowerPC-64 mpn_add_n -- Add two limb vectors of the same length > 0 and
+# store sum in a third limb vector.
+
+# Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr r3
+# s1_ptr r4
+# s2_ptr r5
+# size r6
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_add_n)
+ mtctr r6 # copy size into CTR
+ addic r0,r0,0 # clear cy
+ ld r8,0(r4) # load least significant s1 limb
+ ld r0,0(r5) # load least significant s2 limb
+ addi r3,r3,-8 # offset res_ptr, it's updated before it's used
+ bdz .Lend # If done, skip loop
+.Loop: ld r9,8(r4) # load s1 limb
+ ld r10,8(r5) # load s2 limb
+ adde r7,r0,r8 # add limbs with cy, set cy
+ std r7,8(r3) # store result limb
+ bdz .Lexit # decrement CTR and exit if done
+ ldu r8,16(r4) # load s1 limb and update s1_ptr
+ ldu r0,16(r5) # load s2 limb and update s2_ptr
+ adde r7,r10,r9 # add limbs with cy, set cy
+ stdu r7,16(r3) # store result limb and update res_ptr
+ bdnz .Loop # decrement CTR and loop back
+
+.Lend: adde r7,r0,r8
+ std r7,8(r3) # store ultimate result limb
+ li r3,0 # load cy into ...
+ addze r3,r3 # ... return value register
+ blr
+.Lexit: adde r7,r10,r9
+ std r7,16(r3)
+ li r3,0 # load cy into ...
+ addze r3,r3 # ... return value register
+ blr
+EPILOGUE(mpn_add_n)
diff --git a/rts/gmp/mpn/powerpc64/addmul_1.asm b/rts/gmp/mpn/powerpc64/addmul_1.asm
new file mode 100644
index 0000000000..81774482fe
--- /dev/null
+++ b/rts/gmp/mpn/powerpc64/addmul_1.asm
@@ -0,0 +1,52 @@
+# PowerPC-64 mpn_addmul_1 -- Multiply a limb vector with a limb and add
+# the result to a second limb vector.
+
+# Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr r3
+# s1_ptr r4
+# size r5
+# s2_limb r6
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+ mtctr 5
+ li 9,0 # cy_limb = 0
+ addic 0,0,0
+ cal 3,-8(3)
+ cal 4,-8(4)
+.Loop:
+ ldu 0,8(4)
+ ld 10,8(3)
+ mulld 7,0,6
+ adde 7,7,9
+ mulhdu 9,0,6
+ addze 9,9
+ addc 7,7,10
+ stdu 7,8(3)
+ bdnz .Loop
+
+ addze 3,9
+ blr
+EPILOGUE(mpn_addmul_1)
diff --git a/rts/gmp/mpn/powerpc64/addsub_n.asm b/rts/gmp/mpn/powerpc64/addsub_n.asm
new file mode 100644
index 0000000000..4ed40d71ae
--- /dev/null
+++ b/rts/gmp/mpn/powerpc64/addsub_n.asm
@@ -0,0 +1,107 @@
+# PowerPC-64 mpn_addsub_n -- Simultaneous add and sub.
+
+# Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr r3
+# s1_ptr r4
+# s2_ptr r5
+# size r6
+
+include(`asm-syntax.m4')
+
+define(SAVE_BORROW_RESTORE_CARRY,
+ `sldi $1,$1,63
+ adde $1,$1,$1')
+define(SAVE_CARRY_RESTORE_BORROW,
+ `sldi $1,$1,63
+ adde $1,$1,$1')
+
+# 19991117
+
+# This is just crafted for testing some ideas, and verifying that we can make
+# it run fast. It runs at 2.55 cycles/limb on the 630, which is very good.
+# We should play a little with the schedule. No time has been spent on that.
+
+# To finish this, the loop warm up and cool down code needs to be written,
+# and the result need to be tested. Also, the proper calling sequence should
+# be used.
+
+# r1p r2p s1p s2p n
+# Use reg r0, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12
+
+ASM_START()
+PROLOGUE(mpn_addsub_n)
+ std r14,-64(1)
+ std r15,-56(1)
+ std r16,-48(1)
+ std r17,-40(1)
+ std r18,-32(1)
+ std r19,-24(1)
+
+ srdi r7,r7,2
+ mtctr r7 # copy size into CTR
+ addic r0,r0,0 # clear cy
+ addi r3,r3,-8 # offset res_ptr, it's updated before it's used
+ addi r4,r4,-8 # offset res_ptr, it's updated before it's used
+
+.Loop:
+ adde r12,r8,r9
+ std r12,8(r3)
+ adde r12,r10,r11
+ std r12,16(r3)
+
+ SAVE_CARRY_RESTORE_BORROW(r0)
+
+ subfe r12,r8,r9
+ std r12,8(r4)
+ ld r8,8(r5) # s1 L 1
+ ld r9,8(r6) # s2 L 1
+ subfe r12,r10,r11
+ std r12,16(r4)
+ ld r10,16(r5) # s1 L 2
+ ld r11,16(r6) # s2 L 2
+# pair -------------------------
+ subfe r12,r14,r15
+ std r12,24(r4)
+ subfe r12,r16,r17
+ stdu r12,32(r4)
+
+ SAVE_BORROW_RESTORE_CARRY(r0)
+
+ adde r12,r14,r15
+ std r12,24(r3)
+ ld r14,24(r5) # s1 L 3
+ ld r15,24(r6) # s2 L 3
+ adde r12,r16,r17
+ stdu r12,32(r3)
+ ldu r16,32(r5) # s1 L 4
+ ldu r17,32(r6) # s2 L 4
+ bdnz .Loop
+
+ ld r14,-64(1)
+ ld r15,-56(1)
+ ld r16,-48(1)
+ ld r17,-40(1)
+ ld r18,-32(1)
+ ld r19,-24(1)
+ blr
+EPILOGUE(mpn_addsub_n)
diff --git a/rts/gmp/mpn/powerpc64/aix.m4 b/rts/gmp/mpn/powerpc64/aix.m4
new file mode 100644
index 0000000000..aee9f1f97a
--- /dev/null
+++ b/rts/gmp/mpn/powerpc64/aix.m4
@@ -0,0 +1,40 @@
+divert(-1)
+dnl m4 macros for AIX 64-bit assembly.
+
+dnl Copyright (C) 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+define(`ASM_START',
+ `.machine "ppc64"
+ .toc')
+
+define(`PROLOGUE',
+ `
+ .globl $1
+ .globl .$1
+ .csect $1[DS],3
+$1:
+ .llong .$1, TOC[tc0], 0
+ .csect .text[PR]
+ .align 2
+.$1:')
+
+define(`EPILOGUE', `')
+
+divert
diff --git a/rts/gmp/mpn/powerpc64/copyd.asm b/rts/gmp/mpn/powerpc64/copyd.asm
new file mode 100644
index 0000000000..d06e8c25fd
--- /dev/null
+++ b/rts/gmp/mpn/powerpc64/copyd.asm
@@ -0,0 +1,45 @@
+# PowerPC-64 mpn_copyd -- Copy a limb vector.
+
+# Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# rptr r3
+# sptr r4
+# n r5
+
+include(`../config.m4')
+
+# Unrolling this analogous to sparc64/copyi.s doesn't help for any
+# operand sizes.
+
+ASM_START()
+PROLOGUE(mpn_copyd)
+ cmpdi cr0,r5,0
+ mtctr r5
+ sldi r5,r5,3
+ add r4,r4,r5
+ add r3,r3,r5
+ beq cr0,.Lend
+.Loop: ldu r0,-8(r4)
+ stdu r0,-8(r3)
+ bdnz .Loop
+.Lend: blr
+EPILOGUE(mpn_copyd)
diff --git a/rts/gmp/mpn/powerpc64/copyi.asm b/rts/gmp/mpn/powerpc64/copyi.asm
new file mode 100644
index 0000000000..a1bedc4c5b
--- /dev/null
+++ b/rts/gmp/mpn/powerpc64/copyi.asm
@@ -0,0 +1,44 @@
+# PowerPC-64 mpn_copyi -- Copy a limb vector.
+
+# Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# rptr r3
+# sptr r4
+# n r5
+
+include(`../config.m4')
+
+# Unrolling this analogous to sparc64/copyi.s doesn't help for any
+# operand sizes.
+
+ASM_START()
+PROLOGUE(mpn_copyi)
+ cmpdi cr0,r5,0
+ mtctr r5
+ addi r4,r4,-8
+ addi r3,r3,-8
+ beq cr0,.Lend
+.Loop: ldu r0,8(r4)
+ stdu r0,8(r3)
+ bdnz .Loop
+.Lend: blr
+EPILOGUE(mpn_copyi)
diff --git a/rts/gmp/mpn/powerpc64/gmp-mparam.h b/rts/gmp/mpn/powerpc64/gmp-mparam.h
new file mode 100644
index 0000000000..6fefb960cd
--- /dev/null
+++ b/rts/gmp/mpn/powerpc64/gmp-mparam.h
@@ -0,0 +1,62 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994, 1995, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
+#define BITS_PER_LONGINT 64
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
+
+/* Generated by tuneup.c, 2000-07-16. */
+
+#ifndef KARATSUBA_MUL_THRESHOLD
+#define KARATSUBA_MUL_THRESHOLD 10
+#endif
+#ifndef TOOM3_MUL_THRESHOLD
+#define TOOM3_MUL_THRESHOLD 57
+#endif
+
+#ifndef KARATSUBA_SQR_THRESHOLD
+#define KARATSUBA_SQR_THRESHOLD 16
+#endif
+#ifndef TOOM3_SQR_THRESHOLD
+#define TOOM3_SQR_THRESHOLD 89
+#endif
+
+#ifndef BZ_THRESHOLD
+#define BZ_THRESHOLD 28
+#endif
+
+#ifndef FIB_THRESHOLD
+#define FIB_THRESHOLD 216
+#endif
+
+#ifndef POWM_THRESHOLD
+#define POWM_THRESHOLD 14
+#endif
+
+#ifndef GCD_ACCEL_THRESHOLD
+#define GCD_ACCEL_THRESHOLD 6
+#endif
+#ifndef GCDEXT_THRESHOLD
+#define GCDEXT_THRESHOLD 163
+#endif
diff --git a/rts/gmp/mpn/powerpc64/lshift.asm b/rts/gmp/mpn/powerpc64/lshift.asm
new file mode 100644
index 0000000000..cef3a81fdd
--- /dev/null
+++ b/rts/gmp/mpn/powerpc64/lshift.asm
@@ -0,0 +1,159 @@
+# PowerPC-64 mpn_lshift -- Shift a number left.
+
+# Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr r3
+# s1_ptr r4
+# size r5
+# cnt r6
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+ cmpdi cr0,r5,20 # more than 20 limbs?
+ sldi r0,r5,3
+ add r4,r4,r0 # make r4 point at end of s1
+ add r7,r3,r0 # make r7 point at end of res
+ bgt .LBIG # branch if more than 12 limbs
+
+ mtctr r5 # copy size into CTR
+ subfic r8,r6,64
+ ldu r11,-8(r4) # load first s1 limb
+ srd r3,r11,r8 # compute function return value
+ bdz .Lend1
+
+.Loop: ldu r10,-8(r4)
+ sld r9,r11,r6
+ srd r12,r10,r8
+ or r9,r9,r12
+ stdu r9,-8(r7)
+ bdz .Lend2
+ ldu r11,-8(r4)
+ sld r9,r10,r6
+ srd r12,r11,r8
+ or r9,r9,r12
+ stdu r9,-8(r7)
+ bdnz .Loop
+
+.Lend1: sld r0,r11,r6
+ std r0,-8(r7)
+ blr
+.Lend2: sld r0,r10,r6
+ std r0,-8(r7)
+ blr
+
+.LBIG:
+ std r24,-64(1)
+ std r25,-56(1)
+ std r26,-48(1)
+ std r27,-40(1)
+ std r28,-32(1)
+ std r29,-24(1)
+ std r30,-16(1)
+ std r31,-8(1)
+ ldu r9,-8(r4)
+ subfic r8,r6,64
+ srd r3,r9,r8 # compute function return value
+ sld r0,r9,r6
+ addi r5,r5,-1
+
+ andi. r10,r5,3 # count for spill loop
+ beq .Le
+ mtctr r10
+ ldu r28,-8(r4)
+ bdz .Lxe0
+
+.Loop0: sld r12,r28,r6
+ srd r24,r28,r8
+ ldu r28,-8(r4)
+ or r24,r0,r24
+ stdu r24,-8(r7)
+ mr r0,r12
+ bdnz .Loop0 # taken at most once!
+
+.Lxe0: sld r12,r28,r6
+ srd r24,r28,r8
+ or r24,r0,r24
+ stdu r24,-8(r7)
+ mr r0,r12
+
+.Le: srdi r5,r5,2 # count for unrolled loop
+ addi r5,r5,-1
+ mtctr r5
+ ld r28,-8(r4)
+ ld r29,-16(r4)
+ ld r30,-24(r4)
+ ldu r31,-32(r4)
+
+.LoopU: sld r9,r28,r6
+ srd r24,r28,r8
+ ld r28,-8(r4)
+ sld r10,r29,r6
+ srd r25,r29,r8
+ ld r29,-16(r4)
+ sld r11,r30,r6
+ srd r26,r30,r8
+ ld r30,-24(r4)
+ sld r12,r31,r6
+ srd r27,r31,r8
+ ldu r31,-32(r4)
+ or r24,r0,r24
+ std r24,-8(r7)
+ or r25,r9,r25
+ std r25,-16(r7)
+ or r26,r10,r26
+ std r26,-24(r7)
+ or r27,r11,r27
+ stdu r27,-32(r7)
+ mr r0,r12
+ bdnz .LoopU
+
+ sld r9,r28,r6
+ srd r24,r28,r8
+ sld r10,r29,r6
+ srd r25,r29,r8
+ sld r11,r30,r6
+ srd r26,r30,r8
+ sld r12,r31,r6
+ srd r27,r31,r8
+ or r24,r0,r24
+ std r24,-8(r7)
+ or r25,r9,r25
+ std r25,-16(r7)
+ or r26,r10,r26
+ std r26,-24(r7)
+ or r27,r11,r27
+ stdu r27,-32(r7)
+ mr r0,r12
+
+ std r0,-8(r7)
+ ld r24,-64(1)
+ ld r25,-56(1)
+ ld r26,-48(1)
+ ld r27,-40(1)
+ ld r28,-32(1)
+ ld r29,-24(1)
+ ld r30,-16(1)
+ ld r31,-8(1)
+ blr
+EPILOGUE(mpn_lshift)
diff --git a/rts/gmp/mpn/powerpc64/mul_1.asm b/rts/gmp/mpn/powerpc64/mul_1.asm
new file mode 100644
index 0000000000..47597283ff
--- /dev/null
+++ b/rts/gmp/mpn/powerpc64/mul_1.asm
@@ -0,0 +1,49 @@
+# PowerPC-64 mpn_mul_1 -- Multiply a limb vector with a limb and store
+# the result in a second limb vector.
+
+# Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr r3
+# s1_ptr r4
+# size r5
+# s2_limb r6
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+ mtctr 5
+ li 9,0 # cy_limb = 0
+ addic 0,0,0
+ cal 3,-8(3)
+ cal 4,-8(4)
+.Loop:
+ ldu 0,8(4)
+ mulld 7,0,6
+ adde 7,7,9
+ mulhdu 9,0,6
+ stdu 7,8(3)
+ bdnz .Loop
+
+ addze 3,9
+ blr
+EPILOGUE(mpn_mul_1)
diff --git a/rts/gmp/mpn/powerpc64/rshift.asm b/rts/gmp/mpn/powerpc64/rshift.asm
new file mode 100644
index 0000000000..88272c7fa9
--- /dev/null
+++ b/rts/gmp/mpn/powerpc64/rshift.asm
@@ -0,0 +1,60 @@
+# PowerPC-64 mpn_rshift -- Shift a number right.
+
+# Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr r3
+# s1_ptr r4
+# size r5
+# cnt r6
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+ mtctr r5 # copy size into CTR
+ addi r7,r3,-8 # move adjusted res_ptr to free return reg
+ subfic r8,r6,64
+ ld r11,0(r4) # load first s1 limb
+ sld r3,r11,r8 # compute function return value
+ bdz .Lend1
+
+.Loop: ldu r10,8(r4)
+ srd r9,r11,r6
+ sld r12,r10,r8
+ or r9,r9,r12
+ stdu r9,8(r7)
+ bdz .Lend2
+ ldu r11,8(r4)
+ srd r9,r10,r6
+ sld r12,r11,r8
+ or r9,r9,r12
+ stdu r9,8(r7)
+ bdnz .Loop
+
+.Lend1: srd r0,r11,r6
+ std r0,8(r7)
+ blr
+
+.Lend2: srd r0,r10,r6
+ std r0,8(r7)
+ blr
+EPILOGUE(mpn_rshift)
diff --git a/rts/gmp/mpn/powerpc64/sub_n.asm b/rts/gmp/mpn/powerpc64/sub_n.asm
new file mode 100644
index 0000000000..4de3de69c7
--- /dev/null
+++ b/rts/gmp/mpn/powerpc64/sub_n.asm
@@ -0,0 +1,61 @@
+# PowerPC-64 mpn_sub_n -- Subtract two limb vectors of the same length > 0
+# and store difference in a third limb vector.
+
+# Copyright (C) 1999, 2000 Free Software Foundation, Inc.b
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr r3
+# s1_ptr r4
+# s2_ptr r5
+# size r6
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_sub_n)
+ mtctr r6 # copy size into CTR
+ addic r0,r6,-1 # set cy
+ ld r8,0(r4) # load least significant s1 limb
+ ld r0,0(r5) # load least significant s2 limb
+ addi r3,r3,-8 # offset res_ptr, it's updated before it's used
+ bdz .Lend # If done, skip loop
+.Loop: ld r9,8(r4) # load s1 limb
+ ld r10,8(r5) # load s2 limb
+ subfe r7,r0,r8 # subtract limbs with cy, set cy
+ std r7,8(r3) # store result limb
+ bdz .Lexit # decrement CTR and exit if done
+ ldu r8,16(r4) # load s1 limb and update s1_ptr
+ ldu r0,16(r5) # load s2 limb and update s2_ptr
+ subfe r7,r10,r9 # subtract limbs with cy, set cy
+ stdu r7,16(r3) # store result limb and update res_ptr
+ bdnz .Loop # decrement CTR and loop back
+
+.Lend: subfe r7,r0,r8
+ std r7,8(r3) # store ultimate result limb
+ subfe r3,r0,r0 # load !cy into ...
+ subfic r3,r3,0 # ... return value register
+ blr
+.Lexit: subfe r7,r10,r9
+ std r7,16(r3)
+ subfe r3,r0,r0 # load !cy into ...
+ subfic r3,r3,0 # ... return value register
+ blr
+EPILOGUE(mpn_sub_n)
diff --git a/rts/gmp/mpn/powerpc64/submul_1.asm b/rts/gmp/mpn/powerpc64/submul_1.asm
new file mode 100644
index 0000000000..17f6369a38
--- /dev/null
+++ b/rts/gmp/mpn/powerpc64/submul_1.asm
@@ -0,0 +1,54 @@
+# PowerPC-64 mpn_submul_1 -- Multiply a limb vector with a limb and subtract
+# the result from a second limb vector.
+
+# Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr r3
+# s1_ptr r4
+# size r5
+# s2_limb r6
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+ mtctr 5
+ li 9,0 # cy_limb = 0
+ addic 0,0,0
+ cal 3,-8(3)
+ cal 4,-8(4)
+.Loop:
+ ldu 0,8(4)
+ ld 10,8(3)
+ mulld 7,0,6
+ adde 7,7,9
+ mulhdu 9,0,6
+ addze 9,9
+ subfc 7,7,10
+ stdu 7,8(3)
+ subfe 11,11,11 # invert ...
+ addic 11,11,1 # ... carry
+ bdnz .Loop
+
+ addze 3,9
+ blr
+EPILOGUE(mpn_submul_1)