summaryrefslogtreecommitdiff
path: root/rts/gmp/mpn/powerpc64/addsub_n.asm
diff options
context:
space:
mode:
Diffstat (limited to 'rts/gmp/mpn/powerpc64/addsub_n.asm')
-rw-r--r--rts/gmp/mpn/powerpc64/addsub_n.asm107
1 files changed, 107 insertions, 0 deletions
diff --git a/rts/gmp/mpn/powerpc64/addsub_n.asm b/rts/gmp/mpn/powerpc64/addsub_n.asm
new file mode 100644
index 0000000000..4ed40d71ae
--- /dev/null
+++ b/rts/gmp/mpn/powerpc64/addsub_n.asm
@@ -0,0 +1,107 @@
+# PowerPC-64 mpn_addsub_n -- Simultaneous add and sub.
+
+# Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr r3
+# s1_ptr r4
+# s2_ptr r5
+# size r6
+
+include(`asm-syntax.m4')
+
+define(SAVE_BORROW_RESTORE_CARRY,
+ `sldi $1,$1,63
+ adde $1,$1,$1')
+define(SAVE_CARRY_RESTORE_BORROW,
+ `sldi $1,$1,63
+ adde $1,$1,$1')
+
+# 19991117
+
+# This is just crafted for testing some ideas, and verifying that we can make
+# it run fast. It runs at 2.55 cycles/limb on the 630, which is very good.
+# We should play a little with the schedule. No time has been spent on that.
+
+# To finish this, the loop warm up and cool down code needs to be written,
+# and the result need to be tested. Also, the proper calling sequence should
+# be used.
+
+# r1p r2p s1p s2p n
+# Use reg r0, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12
+
+ASM_START()
+PROLOGUE(mpn_addsub_n)
+ std r14,-64(1)
+ std r15,-56(1)
+ std r16,-48(1)
+ std r17,-40(1)
+ std r18,-32(1)
+ std r19,-24(1)
+
+ srdi r7,r7,2
+ mtctr r7 # copy size into CTR
+ addic r0,r0,0 # clear cy
+ addi r3,r3,-8 # offset res_ptr, it's updated before it's used
+ addi r4,r4,-8 # offset res_ptr, it's updated before it's used
+
+.Loop:
+ adde r12,r8,r9
+ std r12,8(r3)
+ adde r12,r10,r11
+ std r12,16(r3)
+
+ SAVE_CARRY_RESTORE_BORROW(r0)
+
+ subfe r12,r8,r9
+ std r12,8(r4)
+ ld r8,8(r5) # s1 L 1
+ ld r9,8(r6) # s2 L 1
+ subfe r12,r10,r11
+ std r12,16(r4)
+ ld r10,16(r5) # s1 L 2
+ ld r11,16(r6) # s2 L 2
+# pair -------------------------
+ subfe r12,r14,r15
+ std r12,24(r4)
+ subfe r12,r16,r17
+ stdu r12,32(r4)
+
+ SAVE_BORROW_RESTORE_CARRY(r0)
+
+ adde r12,r14,r15
+ std r12,24(r3)
+ ld r14,24(r5) # s1 L 3
+ ld r15,24(r6) # s2 L 3
+ adde r12,r16,r17
+ stdu r12,32(r3)
+ ldu r16,32(r5) # s1 L 4
+ ldu r17,32(r6) # s2 L 4
+ bdnz .Loop
+
+ ld r14,-64(1)
+ ld r15,-56(1)
+ ld r16,-48(1)
+ ld r17,-40(1)
+ ld r18,-32(1)
+ ld r19,-24(1)
+ blr
+EPILOGUE(mpn_addsub_n)