diff options
author | Torbjorn Granlund <tege@gmplib.org> | 2013-08-23 16:42:29 +0200 |
---|---|---|
committer | Torbjorn Granlund <tege@gmplib.org> | 2013-08-23 16:42:29 +0200 |
commit | 7176b154b6e5eb9d68d60bd1bea4f154901bdb37 (patch) | |
tree | 34a96813ef208c642b63210191efb8fa64b27b5e /mpn/alpha | |
parent | 637103c1932d642f42340d0d242d87835782d0e2 (diff) | |
download | gmp-7176b154b6e5eb9d68d60bd1bea4f154901bdb37.tar.gz |
Replace alpha sqr_diagonal implementations by one sqr_diag_addlsh1.
Diffstat (limited to 'mpn/alpha')
-rw-r--r-- | mpn/alpha/ev6/sqr_diagonal.asm | 115 | ||||
-rw-r--r-- | mpn/alpha/sqr_diag_addlsh1.asm | 82 | ||||
-rw-r--r-- | mpn/alpha/sqr_diagonal.asm | 65 |
3 files changed, 82 insertions, 180 deletions
diff --git a/mpn/alpha/ev6/sqr_diagonal.asm b/mpn/alpha/ev6/sqr_diagonal.asm deleted file mode 100644 index 58d086e62..000000000 --- a/mpn/alpha/ev6/sqr_diagonal.asm +++ /dev/null @@ -1,115 +0,0 @@ -dnl Alpha mpn_sqr_diagonal. - -dnl Copyright 2001, 2002, 2006 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. - -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of the GNU Lesser General Public License as published -dnl by the Free Software Foundation; either version 3 of the License, or (at -dnl your option) any later version. - -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -dnl License for more details. - -dnl You should have received a copy of the GNU Lesser General Public License -dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb -C EV4: ? -C EV5: ? -C EV6: 2.3 - -C INPUT PARAMETERS -C rp r16 -C up r17 -C n r18 - - -ASM_START() -PROLOGUE(mpn_sqr_diagonal) - lda r18, -2(r18) C n -= 2 - ldq r0, 0(r17) - mulq r0, r0, r4 - umulh r0, r0, r20 - blt r18, L(ex1) - ldq r1, 8(r17) - mulq r1, r1, r5 - umulh r1, r1, r21 - beq r18, L(ex2) - lda r18, -2(r18) C n -= 2 - ldq r0, 16(r17) - blt r18, L(ex3) - ldq r1, 24(r17) - beq r18, L(ex4) - - ALIGN(16) -L(top): lda r18, -2(r18) C n -= 2 - stq r4, 0(r16) - mulq r0, r0, r4 - stq r20, 8(r16) - umulh r0, r0, r20 - ldq r0, 32(r17) - blt r18, L(x) - stq r5, 16(r16) - mulq r1, r1, r5 - stq r21, 24(r16) - umulh r1, r1, r21 - ldq r1, 40(r17) - lda r16, 32(r16) C rp += 4 - lda r17, 16(r17) C up += 2 - bne r18, L(top) - - ALIGN(16) -L(ex4): stq r4, 0(r16) - mulq r0, r0, r4 - stq r20, 8(r16) - umulh r0, r0, r20 - stq r5, 16(r16) - mulq r1, r1, r5 - stq r21, 24(r16) - umulh r1, r1, r21 - stq r4, 32(r16) - stq r20, 40(r16) - stq r5, 48(r16) - stq r21, 56(r16) - ret r31, (r26), 1 - ALIGN(16) -L(x): stq r5, 16(r16) - mulq r1, r1, r5 - stq r21, 24(r16) - umulh r1, r1, r21 - stq r4, 32(r16) - mulq r0, r0, r4 - stq r20, 40(r16) - umulh r0, r0, r20 - stq r5, 48(r16) - stq r21, 56(r16) - stq r4, 64(r16) - stq r20, 72(r16) - ret r31, (r26), 1 -L(ex1): stq r4, 0(r16) - stq r20, 8(r16) - ret r31, (r26), 1 - ALIGN(16) -L(ex2): stq r4, 0(r16) - stq r20, 8(r16) - stq r5, 16(r16) - stq r21, 24(r16) - ret r31, (r26), 1 - ALIGN(16) -L(ex3): stq r4, 0(r16) - mulq r0, r0, r4 - stq r20, 8(r16) - umulh r0, r0, r20 - stq r5, 16(r16) - stq r21, 24(r16) - stq r4, 32(r16) - stq r20, 40(r16) - ret r31, (r26), 1 -EPILOGUE() -ASM_END() diff --git a/mpn/alpha/sqr_diag_addlsh1.asm b/mpn/alpha/sqr_diag_addlsh1.asm new file mode 100644 index 000000000..5d0bd6863 --- /dev/null +++ b/mpn/alpha/sqr_diag_addlsh1.asm @@ -0,0 +1,82 @@ +dnl Alpha mpn_sqr_diag_addlsh1. + +dnl Copyright 2013 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C EV4: ? +C EV5: 10.2 +C EV6: 4.5 + +C Ideally, one-way code could run at 9 c/l (limited by mulq+umulh) on ev5 and +C about 3.75 c/l on ev6. Two-way code could run at about 3.25 c/l on ev6. + +C Algorithm: We allow ourselves to propagate carry to a product high word +C without worrying for carry out, since (B-1)^2 = B^2-2B+1 has a high word of +C B-2, i.e, will not spill. We propagate carry similarly to a product low word +C since the problem value B-1 is a quadratic non-residue mod B, but our +C products are squares. + +define(`rp', `r16') +define(`tp', `r17') +define(`up', `r18') +define(`n', `r19') + +ASM_START() +PROLOGUE(mpn_sqr_diag_addlsh1) + ldq r0, 0(up) + bis r31, r31, r21 + bis r31, r31, r3 + mulq r0, r0, r7 + stq r7, 0(rp) + umulh r0, r0, r6 + lda n, -1(n) + + ALIGN(16) +L(top): ldq r0, 8(up) + lda up, 8(up) + ldq r8, 0(tp) + ldq r20, 8(tp) + mulq r0, r0, r7 + lda tp, 16(tp) + sll r8, 1, r23 + srl r8, 63, r22 + or r21, r23, r23 + sll r20, 1, r24 + addq r3, r6, r6 C cannot carry per comment above + or r22, r24, r24 + addq r23, r6, r21 + umulh r0, r0, r6 + cmpult r21, r23, r1 + addq r1, r7, r7 C cannot carry per comment above + stq r21, 8(rp) + addq r24, r7, r22 + stq r22, 16(rp) + lda n, -1(n) + cmpult r22, r7, r3 + srl r20, 63, r21 + lda rp, 16(rp) + bne n, L(top) + + addq r3, r6, r6 C cannot carry per comment above + addq r21, r6, r21 + stq r21, 8(rp) + ret r31, (r26), 1 +EPILOGUE() +ASM_END() diff --git a/mpn/alpha/sqr_diagonal.asm b/mpn/alpha/sqr_diagonal.asm deleted file mode 100644 index 2aa7f2e59..000000000 --- a/mpn/alpha/sqr_diagonal.asm +++ /dev/null @@ -1,65 +0,0 @@ -dnl Alpha mpn_sqr_diagonal. - -dnl Copyright 2001, 2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. - -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of the GNU Lesser General Public License as published -dnl by the Free Software Foundation; either version 3 of the License, or (at -dnl your option) any later version. - -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -dnl License for more details. - -dnl You should have received a copy of the GNU Lesser General Public License -dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb -C EV4: 42 -C EV5: 18 -C EV6: 3.45 - -C INPUT PARAMETERS -C rp r16 -C up r17 -C n r18 - - -ASM_START() -PROLOGUE(mpn_sqr_diagonal) - ldq r2,0(r17) C r2 = s1_limb - lda r18,-2(r18) C size -= 2 - mulq r2,r2,r3 C r3 = prod_low - umulh r2,r2,r4 C r4 = prod_high - blt r18,$Lend1 C jump if size was == 1 - ldq r2,8(r17) C r2 = s1_limb - beq r18,$Lend2 C jump if size was == 2 - - ALIGN(8) -$Loop: stq r3,0(r16) - mulq r2,r2,r3 C r3 = prod_low - lda r18,-1(r18) C size-- - stq r4,8(r16) - umulh r2,r2,r4 C r4 = cy_limb - ldq r2,16(r17) C r2 = s1_limb - lda r17,8(r17) C s1_ptr++ - lda r16,16(r16) C res_ptr++ - bne r18,$Loop - -$Lend2: stq r3,0(r16) - mulq r2,r2,r3 C r3 = prod_low - stq r4,8(r16) - umulh r2,r2,r4 C r4 = cy_limb - stq r3,16(r16) - stq r4,24(r16) - ret r31,(r26),1 -$Lend1: stq r3,0(r16) - stq r4,8(r16) - ret r31,(r26),1 -EPILOGUE(mpn_sqr_diagonal) -ASM_END() |