summaryrefslogtreecommitdiff
path: root/mpn/powerpc32
diff options
context:
space:
mode:
authorTorbjorn Granlund <tege@gmplib.org>2010-05-24 10:21:57 +0200
committerTorbjorn Granlund <tege@gmplib.org>2010-05-24 10:21:57 +0200
commit1f8bd61e70243f225f00129cbf0a1373cf243f22 (patch)
treeebf10fd86ce4b56e08d2f2e3ad174bfaf307351b /mpn/powerpc32
parentf9552adbf2c5cfce893635ca3d405715026de651 (diff)
downloadgmp-1f8bd61e70243f225f00129cbf0a1373cf243f22.tar.gz
New file.
Diffstat (limited to 'mpn/powerpc32')
-rw-r--r--mpn/powerpc32/lshiftc.asm158
1 files changed, 158 insertions, 0 deletions
diff --git a/mpn/powerpc32/lshiftc.asm b/mpn/powerpc32/lshiftc.asm
new file mode 100644
index 000000000..3d7a82a7c
--- /dev/null
+++ b/mpn/powerpc32/lshiftc.asm
@@ -0,0 +1,158 @@
+dnl PowerPC-32 mpn_lshiftc.
+
+dnl Copyright 1995, 1998, 2000, 2002, 2003, 2004, 2005, 2010 Free Software
+dnl Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C 603e: ?
+C 604e: 3.0
+C 75x (G3): 3.0
+C 7400,7410 (G4): 3.0
+C 7445,7455 (G4+): 2.5
+C 7447,7457 (G4+): 2.25
+C power4/ppc970: 2.5
+C power5: 2.5
+
+C INPUT PARAMETERS
+C rp r3
+C up r4
+C n r5
+C cnt r6
+
+ASM_START()
+PROLOGUE(mpn_lshiftc)
+ cmpwi cr0, r5, 30 C more than 30 limbs?
+ slwi r0, r5, 2
+ add r4, r4, r0 C make r4 point at end of s1
+ add r7, r3, r0 C make r7 point at end of res
+ bgt L(BIG) C branch if more than 12 limbs
+
+ mtctr r5 C copy size into CTR
+ subfic r8, r6, 32
+ lwzu r11, -4(r4) C load first s1 limb
+ srw r3, r11, r8 C compute function return value
+ bdz L(end1)
+
+L(oop): lwzu r10, -4(r4)
+ slw r9, r11, r6
+ srw r12, r10, r8
+ nor r9, r9, r12
+ stwu r9, -4(r7)
+ bdz L(end2)
+ lwzu r11, -4(r4)
+ slw r9, r10, r6
+ srw r12, r11, r8
+ nor r9, r9, r12
+ stwu r9, -4(r7)
+ bdnz L(oop)
+
+L(end1):
+ slw r0, r11, r6
+ nor r0, r0, r0
+ stw r0, -4(r7)
+ blr
+L(end2):
+ slw r0, r10, r6
+ nor r0, r0, r0
+ stw r0, -4(r7)
+ blr
+
+L(BIG):
+ stmw r24, -32(r1) C save registers we are supposed to preserve
+ lwzu r9, -4(r4)
+ subfic r8, r6, 32
+ srw r3, r9, r8 C compute function return value
+ slw r0, r9, r6
+ addi r5, r5, -1
+
+ andi. r10, r5, 3 C count for spill loop
+ beq L(e)
+ mtctr r10
+ lwzu r28, -4(r4)
+ bdz L(xe0)
+
+L(loop0):
+ slw r12, r28, r6
+ srw r24, r28, r8
+ lwzu r28, -4(r4)
+ nor r24, r0, r24
+ stwu r24, -4(r7)
+ mr r0, r12
+ bdnz L(loop0) C taken at most once!
+
+L(xe0): slw r12, r28, r6
+ srw r24, r28, r8
+ nor r24, r0, r24
+ stwu r24, -4(r7)
+ mr r0, r12
+
+L(e): srwi r5, r5, 2 C count for unrolled loop
+ addi r5, r5, -1
+ mtctr r5
+ lwz r28, -4(r4)
+ lwz r29, -8(r4)
+ lwz r30, -12(r4)
+ lwzu r31, -16(r4)
+
+L(loopU):
+ slw r9, r28, r6
+ srw r24, r28, r8
+ lwz r28, -4(r4)
+ slw r10, r29, r6
+ srw r25, r29, r8
+ lwz r29, -8(r4)
+ slw r11, r30, r6
+ srw r26, r30, r8
+ lwz r30, -12(r4)
+ slw r12, r31, r6
+ srw r27, r31, r8
+ lwzu r31, -16(r4)
+ nor r24, r0, r24
+ stw r24, -4(r7)
+ nor r25, r9, r25
+ stw r25, -8(r7)
+ nor r26, r10, r26
+ stw r26, -12(r7)
+ nor r27, r11, r27
+ stwu r27, -16(r7)
+ mr r0, r12
+ bdnz L(loopU)
+
+ slw r9, r28, r6
+ srw r24, r28, r8
+ slw r10, r29, r6
+ srw r25, r29, r8
+ slw r11, r30, r6
+ srw r26, r30, r8
+ slw r12, r31, r6
+ srw r27, r31, r8
+ nor r24, r0, r24
+ stw r24, -4(r7)
+ nor r25, r9, r25
+ stw r25, -8(r7)
+ nor r26, r10, r26
+ stw r26, -12(r7)
+ nor r27, r11, r27
+ stw r27, -16(r7)
+ nor r12, r12, r12
+ stw r12, -20(r7)
+ lmw r24, -32(r1) C restore registers
+ blr
+EPILOGUE()