summaryrefslogtreecommitdiff
path: root/mpn/alpha
diff options
context:
space:
mode:
authorTorbjorn Granlund <tege@gmplib.org>2013-08-23 16:42:29 +0200
committerTorbjorn Granlund <tege@gmplib.org>2013-08-23 16:42:29 +0200
commit7176b154b6e5eb9d68d60bd1bea4f154901bdb37 (patch)
tree34a96813ef208c642b63210191efb8fa64b27b5e /mpn/alpha
parent637103c1932d642f42340d0d242d87835782d0e2 (diff)
downloadgmp-7176b154b6e5eb9d68d60bd1bea4f154901bdb37.tar.gz
Replace alpha sqr_diagonal implementations by one sqr_diag_addlsh1.
Diffstat (limited to 'mpn/alpha')
-rw-r--r--mpn/alpha/ev6/sqr_diagonal.asm115
-rw-r--r--mpn/alpha/sqr_diag_addlsh1.asm82
-rw-r--r--mpn/alpha/sqr_diagonal.asm65
3 files changed, 82 insertions, 180 deletions
diff --git a/mpn/alpha/ev6/sqr_diagonal.asm b/mpn/alpha/ev6/sqr_diagonal.asm
deleted file mode 100644
index 58d086e62..000000000
--- a/mpn/alpha/ev6/sqr_diagonal.asm
+++ /dev/null
@@ -1,115 +0,0 @@
-dnl Alpha mpn_sqr_diagonal.
-
-dnl Copyright 2001, 2002, 2006 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of the GNU Lesser General Public License as published
-dnl by the Free Software Foundation; either version 3 of the License, or (at
-dnl your option) any later version.
-
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-dnl License for more details.
-
-dnl You should have received a copy of the GNU Lesser General Public License
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C EV4: ?
-C EV5: ?
-C EV6: 2.3
-
-C INPUT PARAMETERS
-C rp r16
-C up r17
-C n r18
-
-
-ASM_START()
-PROLOGUE(mpn_sqr_diagonal)
- lda r18, -2(r18) C n -= 2
- ldq r0, 0(r17)
- mulq r0, r0, r4
- umulh r0, r0, r20
- blt r18, L(ex1)
- ldq r1, 8(r17)
- mulq r1, r1, r5
- umulh r1, r1, r21
- beq r18, L(ex2)
- lda r18, -2(r18) C n -= 2
- ldq r0, 16(r17)
- blt r18, L(ex3)
- ldq r1, 24(r17)
- beq r18, L(ex4)
-
- ALIGN(16)
-L(top): lda r18, -2(r18) C n -= 2
- stq r4, 0(r16)
- mulq r0, r0, r4
- stq r20, 8(r16)
- umulh r0, r0, r20
- ldq r0, 32(r17)
- blt r18, L(x)
- stq r5, 16(r16)
- mulq r1, r1, r5
- stq r21, 24(r16)
- umulh r1, r1, r21
- ldq r1, 40(r17)
- lda r16, 32(r16) C rp += 4
- lda r17, 16(r17) C up += 2
- bne r18, L(top)
-
- ALIGN(16)
-L(ex4): stq r4, 0(r16)
- mulq r0, r0, r4
- stq r20, 8(r16)
- umulh r0, r0, r20
- stq r5, 16(r16)
- mulq r1, r1, r5
- stq r21, 24(r16)
- umulh r1, r1, r21
- stq r4, 32(r16)
- stq r20, 40(r16)
- stq r5, 48(r16)
- stq r21, 56(r16)
- ret r31, (r26), 1
- ALIGN(16)
-L(x): stq r5, 16(r16)
- mulq r1, r1, r5
- stq r21, 24(r16)
- umulh r1, r1, r21
- stq r4, 32(r16)
- mulq r0, r0, r4
- stq r20, 40(r16)
- umulh r0, r0, r20
- stq r5, 48(r16)
- stq r21, 56(r16)
- stq r4, 64(r16)
- stq r20, 72(r16)
- ret r31, (r26), 1
-L(ex1): stq r4, 0(r16)
- stq r20, 8(r16)
- ret r31, (r26), 1
- ALIGN(16)
-L(ex2): stq r4, 0(r16)
- stq r20, 8(r16)
- stq r5, 16(r16)
- stq r21, 24(r16)
- ret r31, (r26), 1
- ALIGN(16)
-L(ex3): stq r4, 0(r16)
- mulq r0, r0, r4
- stq r20, 8(r16)
- umulh r0, r0, r20
- stq r5, 16(r16)
- stq r21, 24(r16)
- stq r4, 32(r16)
- stq r20, 40(r16)
- ret r31, (r26), 1
-EPILOGUE()
-ASM_END()
diff --git a/mpn/alpha/sqr_diag_addlsh1.asm b/mpn/alpha/sqr_diag_addlsh1.asm
new file mode 100644
index 000000000..5d0bd6863
--- /dev/null
+++ b/mpn/alpha/sqr_diag_addlsh1.asm
@@ -0,0 +1,82 @@
+dnl Alpha mpn_sqr_diag_addlsh1.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C EV4: ?
+C EV5: 10.2
+C EV6: 4.5
+
+C Ideally, one-way code could run at 9 c/l (limited by mulq+umulh) on ev5 and
+C about 3.75 c/l on ev6. Two-way code could run at about 3.25 c/l on ev6.
+
+C Algorithm: We allow ourselves to propagate carry to a product high word
+C without worrying for carry out, since (B-1)^2 = B^2-2B+1 has a high word of
+C B-2, i.e, will not spill. We propagate carry similarly to a product low word
+C since the problem value B-1 is a quadratic non-residue mod B, but our
+C products are squares.
+
+define(`rp', `r16')
+define(`tp', `r17')
+define(`up', `r18')
+define(`n', `r19')
+
+ASM_START()
+PROLOGUE(mpn_sqr_diag_addlsh1)
+ ldq r0, 0(up)
+ bis r31, r31, r21
+ bis r31, r31, r3
+ mulq r0, r0, r7
+ stq r7, 0(rp)
+ umulh r0, r0, r6
+ lda n, -1(n)
+
+ ALIGN(16)
+L(top): ldq r0, 8(up)
+ lda up, 8(up)
+ ldq r8, 0(tp)
+ ldq r20, 8(tp)
+ mulq r0, r0, r7
+ lda tp, 16(tp)
+ sll r8, 1, r23
+ srl r8, 63, r22
+ or r21, r23, r23
+ sll r20, 1, r24
+ addq r3, r6, r6 C cannot carry per comment above
+ or r22, r24, r24
+ addq r23, r6, r21
+ umulh r0, r0, r6
+ cmpult r21, r23, r1
+ addq r1, r7, r7 C cannot carry per comment above
+ stq r21, 8(rp)
+ addq r24, r7, r22
+ stq r22, 16(rp)
+ lda n, -1(n)
+ cmpult r22, r7, r3
+ srl r20, 63, r21
+ lda rp, 16(rp)
+ bne n, L(top)
+
+ addq r3, r6, r6 C cannot carry per comment above
+ addq r21, r6, r21
+ stq r21, 8(rp)
+ ret r31, (r26), 1
+EPILOGUE()
+ASM_END()
diff --git a/mpn/alpha/sqr_diagonal.asm b/mpn/alpha/sqr_diagonal.asm
deleted file mode 100644
index 2aa7f2e59..000000000
--- a/mpn/alpha/sqr_diagonal.asm
+++ /dev/null
@@ -1,65 +0,0 @@
-dnl Alpha mpn_sqr_diagonal.
-
-dnl Copyright 2001, 2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of the GNU Lesser General Public License as published
-dnl by the Free Software Foundation; either version 3 of the License, or (at
-dnl your option) any later version.
-
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-dnl License for more details.
-
-dnl You should have received a copy of the GNU Lesser General Public License
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C EV4: 42
-C EV5: 18
-C EV6: 3.45
-
-C INPUT PARAMETERS
-C rp r16
-C up r17
-C n r18
-
-
-ASM_START()
-PROLOGUE(mpn_sqr_diagonal)
- ldq r2,0(r17) C r2 = s1_limb
- lda r18,-2(r18) C size -= 2
- mulq r2,r2,r3 C r3 = prod_low
- umulh r2,r2,r4 C r4 = prod_high
- blt r18,$Lend1 C jump if size was == 1
- ldq r2,8(r17) C r2 = s1_limb
- beq r18,$Lend2 C jump if size was == 2
-
- ALIGN(8)
-$Loop: stq r3,0(r16)
- mulq r2,r2,r3 C r3 = prod_low
- lda r18,-1(r18) C size--
- stq r4,8(r16)
- umulh r2,r2,r4 C r4 = cy_limb
- ldq r2,16(r17) C r2 = s1_limb
- lda r17,8(r17) C s1_ptr++
- lda r16,16(r16) C res_ptr++
- bne r18,$Loop
-
-$Lend2: stq r3,0(r16)
- mulq r2,r2,r3 C r3 = prod_low
- stq r4,8(r16)
- umulh r2,r2,r4 C r4 = cy_limb
- stq r3,16(r16)
- stq r4,24(r16)
- ret r31,(r26),1
-$Lend1: stq r3,0(r16)
- stq r4,8(r16)
- ret r31,(r26),1
-EPILOGUE(mpn_sqr_diagonal)
-ASM_END()