diff options
author | Torbjorn Granlund <tg@gmplib.org> | 2017-12-27 00:27:52 +0100 |
---|---|---|
committer | Torbjorn Granlund <tg@gmplib.org> | 2017-12-27 00:27:52 +0100 |
commit | cc63bc777fb2105af7fc978d063f3dfd5c35aa7c (patch) | |
tree | 56d7c5645ea47f2059e8a5ebc3eeb29e5e6f463e /mpn/powerpc64 | |
parent | 39697035a76adb2303f396862c89fba3646c0760 (diff) | |
download | gmp-cc63bc777fb2105af7fc978d063f3dfd5c35aa7c.tar.gz |
Provide POWER9 addmul_1.asm, utilising maddld/maddhdu.
Diffstat (limited to 'mpn/powerpc64')
-rw-r--r-- | mpn/powerpc64/p9/addmul_1.asm | 136 |
1 files changed, 136 insertions, 0 deletions
diff --git a/mpn/powerpc64/p9/addmul_1.asm b/mpn/powerpc64/p9/addmul_1.asm new file mode 100644 index 000000000..9728d64a7 --- /dev/null +++ b/mpn/powerpc64/p9/addmul_1.asm @@ -0,0 +1,136 @@ +dnl POWER9 mpn_addmul_1. + +dnl Copyright 2017 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C POWER3/PPC630 - +C POWER4/PPC970 - +C POWER5 - +C POWER6 - +C POWER7 - +C POWER8 - +C POWER9 ? + +C TODO +C * Schedule for POWER9 pipeline. +C * Unroll to at least 4x if that proves beneficial. + +C INPUT PARAMETERS +define(`rp', `r3') +define(`up', `r4') +define(`n', `r5') +define(`v0', `r6') + +ASM_START() +PROLOGUE(mpn_addmul_1) + std r31, -8(r1) + + cmpdi cr6, n, 2 + + addi r0, n, -1 + srdi r0, r0, 1 + mtctr r0 + + rldicl. r0, n, 0,63 C r0 = n & 3, set cr0 + bne cr0, L(b1) + +L(b0): ld r10, 0(rp) + ld r12, 0(up) + ld r11, 8(rp) + ld r31, 8(up) + maddld r0, r12, v0, r10 + maddhdu r7, r12, v0, r10 + ble cr6, L(2) + ld r10, 16(rp) + ld r12, 16(up) + maddld r8, r31, v0, r11 + maddhdu r5, r31, v0, r11 + addic up, up, 16 + addi rp, rp, -8 + b L(mid) + +L(b1): ld r11, 0(rp) + ld r31, 0(up) + ble cr6, L(1) + ld r10, 8(rp) + ld r12, 8(up) + maddld r0, r31, v0, r11 + maddhdu r5, r31, v0, r11 + ld r11, 16(rp) + ld r31, 16(up) + maddld r9, r12, v0, r10 + maddhdu r7, r12, v0, r10 + addic up, up, 24 + bdz L(end) + + ALIGN(16) +L(top): ld r10, 24(rp) + ld r12, 0(up) + std r0, 0(rp) + maddld r8, r31, v0, r11 C W:0,2,4 + adde r0, r5, r9 + maddhdu r5, r31, v0, r11 C W:1,3,5 +L(mid): ld r11, 32(rp) + ld r31, 8(up) + std r0, 8(rp) + maddld r9, r12, v0, r10 C W:1,3,5 + adde r0, r7, r8 + maddhdu r7, r12, v0, r10 C W:2,4,6 + addi rp, rp, 16 + addi up, up, 16 + bdnz L(top) + +L(end): std r0, 0(rp) + maddld r8, r31, v0, r11 + adde r0, r5, r9 + maddhdu r5, r31, v0, r11 + std r0, 8(rp) + adde r0, r7, r8 + std r0, 16(rp) + addze r3, r5 + ld r31, -8(r1) + blr + +L(2): maddld r8, r31, v0, r11 + maddhdu r5, r31, v0, r11 + std r0, 0(rp) + addc r0, r7, r8 + std r0, 8(rp) + addze r3, r5 + ld r31, -8(r1) + blr + +L(1): maddld r0, r31, v0, r11 + std r0, 0(rp) + maddhdu r3, r31, v0, r11 + ld r31, -8(r1) + blr +EPILOGUE() |