diff options
author | Torbjorn Granlund <tege@gmplib.org> | 2010-05-24 10:21:57 +0200 |
---|---|---|
committer | Torbjorn Granlund <tege@gmplib.org> | 2010-05-24 10:21:57 +0200 |
commit | 1f8bd61e70243f225f00129cbf0a1373cf243f22 (patch) | |
tree | ebf10fd86ce4b56e08d2f2e3ad174bfaf307351b /mpn/powerpc32 | |
parent | f9552adbf2c5cfce893635ca3d405715026de651 (diff) | |
download | gmp-1f8bd61e70243f225f00129cbf0a1373cf243f22.tar.gz |
New file.
Diffstat (limited to 'mpn/powerpc32')
-rw-r--r-- | mpn/powerpc32/lshiftc.asm | 158 |
1 files changed, 158 insertions, 0 deletions
diff --git a/mpn/powerpc32/lshiftc.asm b/mpn/powerpc32/lshiftc.asm new file mode 100644 index 000000000..3d7a82a7c --- /dev/null +++ b/mpn/powerpc32/lshiftc.asm @@ -0,0 +1,158 @@ +dnl PowerPC-32 mpn_lshiftc. + +dnl Copyright 1995, 1998, 2000, 2002, 2003, 2004, 2005, 2010 Free Software +dnl Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C 603e: ? +C 604e: 3.0 +C 75x (G3): 3.0 +C 7400,7410 (G4): 3.0 +C 7445,7455 (G4+): 2.5 +C 7447,7457 (G4+): 2.25 +C power4/ppc970: 2.5 +C power5: 2.5 + +C INPUT PARAMETERS +C rp r3 +C up r4 +C n r5 +C cnt r6 + +ASM_START() +PROLOGUE(mpn_lshiftc) + cmpwi cr0, r5, 30 C more than 30 limbs? + slwi r0, r5, 2 + add r4, r4, r0 C make r4 point at end of s1 + add r7, r3, r0 C make r7 point at end of res + bgt L(BIG) C branch if more than 12 limbs + + mtctr r5 C copy size into CTR + subfic r8, r6, 32 + lwzu r11, -4(r4) C load first s1 limb + srw r3, r11, r8 C compute function return value + bdz L(end1) + +L(oop): lwzu r10, -4(r4) + slw r9, r11, r6 + srw r12, r10, r8 + nor r9, r9, r12 + stwu r9, -4(r7) + bdz L(end2) + lwzu r11, -4(r4) + slw r9, r10, r6 + srw r12, r11, r8 + nor r9, r9, r12 + stwu r9, -4(r7) + bdnz L(oop) + +L(end1): + slw r0, r11, r6 + nor r0, r0, r0 + stw r0, -4(r7) + blr +L(end2): + slw r0, r10, r6 + nor r0, r0, r0 + stw r0, -4(r7) + blr + +L(BIG): + stmw r24, -32(r1) C save registers we are supposed to preserve + lwzu r9, -4(r4) + subfic r8, r6, 32 + srw r3, r9, r8 C compute function return value + slw r0, r9, r6 + addi r5, r5, -1 + + andi. r10, r5, 3 C count for spill loop + beq L(e) + mtctr r10 + lwzu r28, -4(r4) + bdz L(xe0) + +L(loop0): + slw r12, r28, r6 + srw r24, r28, r8 + lwzu r28, -4(r4) + nor r24, r0, r24 + stwu r24, -4(r7) + mr r0, r12 + bdnz L(loop0) C taken at most once! + +L(xe0): slw r12, r28, r6 + srw r24, r28, r8 + nor r24, r0, r24 + stwu r24, -4(r7) + mr r0, r12 + +L(e): srwi r5, r5, 2 C count for unrolled loop + addi r5, r5, -1 + mtctr r5 + lwz r28, -4(r4) + lwz r29, -8(r4) + lwz r30, -12(r4) + lwzu r31, -16(r4) + +L(loopU): + slw r9, r28, r6 + srw r24, r28, r8 + lwz r28, -4(r4) + slw r10, r29, r6 + srw r25, r29, r8 + lwz r29, -8(r4) + slw r11, r30, r6 + srw r26, r30, r8 + lwz r30, -12(r4) + slw r12, r31, r6 + srw r27, r31, r8 + lwzu r31, -16(r4) + nor r24, r0, r24 + stw r24, -4(r7) + nor r25, r9, r25 + stw r25, -8(r7) + nor r26, r10, r26 + stw r26, -12(r7) + nor r27, r11, r27 + stwu r27, -16(r7) + mr r0, r12 + bdnz L(loopU) + + slw r9, r28, r6 + srw r24, r28, r8 + slw r10, r29, r6 + srw r25, r29, r8 + slw r11, r30, r6 + srw r26, r30, r8 + slw r12, r31, r6 + srw r27, r31, r8 + nor r24, r0, r24 + stw r24, -4(r7) + nor r25, r9, r25 + stw r25, -8(r7) + nor r26, r10, r26 + stw r26, -12(r7) + nor r27, r11, r27 + stw r27, -16(r7) + nor r12, r12, r12 + stw r12, -20(r7) + lmw r24, -32(r1) C restore registers + blr +EPILOGUE() |