diff options
author | Torbjorn Granlund <tg@gmplib.org> | 2021-09-02 21:57:13 +0200 |
---|---|---|
committer | Torbjorn Granlund <tg@gmplib.org> | 2021-09-02 21:57:13 +0200 |
commit | be85b8fde235fb50afe80290dbb265d549f4ec7f (patch) | |
tree | 8caf2e35fa87afb833ed5326fcf5ea8384c4d0b6 /mpn | |
parent | 251e43ef51b172d3aeec8d595216959ddcf7b089 (diff) | |
download | gmp-be85b8fde235fb50afe80290dbb265d549f4ec7f.tar.gz |
Rewrite feed-in code, reducing code size and making it work for v8plus.
Diffstat (limited to 'mpn')
-rw-r--r-- | mpn/sparc32/v8/addmul_1.asm | 66 | ||||
-rw-r--r-- | mpn/sparc32/v8/mul_1.asm | 59 |
2 files changed, 42 insertions, 83 deletions
diff --git a/mpn/sparc32/v8/addmul_1.asm b/mpn/sparc32/v8/addmul_1.asm index 005209278..0bf1b247f 100644 --- a/mpn/sparc32/v8/addmul_1.asm +++ b/mpn/sparc32/v8/addmul_1.asm @@ -40,50 +40,32 @@ C s2_limb o3 ASM_START() PROLOGUE(mpn_addmul_1) + ld [%o1+0],%o4 + andcc %o2,1,%g0 + be L(bx0) + andcc %o2,2,%g0 +L(bx1): be L(01) orcc %g0,%g0,%g2 - ld [%o1+0],%o4 C 1 - - sll %o2,4,%g1 - and %g1,(4-1)<<4,%g1 -ifdef(`PIC', -` mov %o7,%g4 C Save return address register -0: call 1f - add %o7,L(1)-0b,%g3 -1: mov %g4,%o7 C Restore return address register -', -` sethi %hi(L(1)),%g3 - or %g3,%lo(L(1)),%g3 -') - jmp %g3+%g1 - nop -L(1): -L(L00): add %o0,-4,%o0 - b L(loop00) C 4, 8, 12, ... - add %o1,-4,%o1 - nop -L(L01): b L(loop01) C 1, 5, 9, ... - nop - nop - nop -L(L10): add %o0,-12,%o0 C 2, 6, 10, ... - b L(loop10) - add %o1,4,%o1 - nop -L(L11): add %o0,-8,%o0 C 3, 7, 11, ... - b L(loop11) +L(b11): add %o0,-8,%o0 + b L(11) add %o1,-8,%o1 - nop +L(bx0): be L(b00) + orcc %g0,%g0,%g2 +L(b10): add %o0,-12,%o0 + b L(10) + add %o1,4,%o1 +L(b00): add %o0,-4,%o0 + b L(00) + add %o1,-4,%o1 -L(loop): - addcc %g3,%g2,%g3 C 1 +L(top): addcc %g3,%g2,%g3 C 1 ld [%o1+4],%o4 C 2 rd %y,%g2 C 1 addx %g0,%g2,%g2 ld [%o0+0],%g1 C 2 addcc %g1,%g3,%g3 st %g3,[%o0+0] C 1 -L(loop00): - umul %o4,%o3,%g3 C 2 +L(00): umul %o4,%o3,%g3 C 2 ld [%o0+4],%g1 C 2 addxcc %g3,%g2,%g3 C 2 ld [%o1+8],%o4 C 3 @@ -92,8 +74,7 @@ L(loop00): nop addcc %g1,%g3,%g3 st %g3,[%o0+4] C 2 -L(loop11): - umul %o4,%o3,%g3 C 3 +L(11): umul %o4,%o3,%g3 C 3 addxcc %g3,%g2,%g3 C 3 ld [%o1+12],%o4 C 4 rd %y,%g2 C 3 @@ -102,8 +83,7 @@ L(loop11): ld [%o0+8],%g1 C 2 addcc %g1,%g3,%g3 st %g3,[%o0+8] C 3 -L(loop10): - umul %o4,%o3,%g3 C 4 +L(10): umul %o4,%o3,%g3 C 4 addxcc %g3,%g2,%g3 C 4 ld [%o1+0],%o4 C 1 rd %y,%g2 C 4 @@ -113,9 +93,8 @@ L(loop10): st %g3,[%o0+12] C 4 add %o0,16,%o0 addx %g0,%g2,%g2 -L(loop01): - addcc %o2,-4,%o2 - bg L(loop) +L(01): addcc %o2,-4,%o2 + bg L(top) umul %o4,%o3,%g3 C 1 addcc %g3,%g2,%g3 C 4 @@ -124,8 +103,7 @@ L(loop01): ld [%o0+0],%g1 C 2 addcc %g1,%g3,%g3 st %g3,[%o0+0] C 4 - addx %g0,%g2,%o0 retl - nop + addx %g0,%g2,%o0 EPILOGUE(mpn_addmul_1) diff --git a/mpn/sparc32/v8/mul_1.asm b/mpn/sparc32/v8/mul_1.asm index e26c853ae..d03a0e6c0 100644 --- a/mpn/sparc32/v8/mul_1.asm +++ b/mpn/sparc32/v8/mul_1.asm @@ -40,67 +40,48 @@ C s2_limb o3 ASM_START() PROLOGUE(mpn_mul_1) - sll %o2,4,%g1 - and %g1,(4-1)<<4,%g1 -ifdef(`PIC', -` mov %o7,%g4 C Save return address register -0: call 1f - add %o7,L(1)-0b,%g3 -1: mov %g4,%o7 C Restore return address register -', -` sethi %hi(L(1)),%g3 - or %g3,%lo(L(1)),%g3 -') - jmp %g3+%g1 - ld [%o1+0],%o4 C 1 -L(1): -L(L00): add %o0,-4,%o0 - add %o1,-4,%o1 - b L(loop00) C 4, 8, 12, ... + ld [%o1+0],%o4 + andcc %o2,1,%g0 + be L(bx0) + andcc %o2,2,%g0 +L(bx1): be L(01) orcc %g0,%g0,%g2 -L(L01): b L(loop01) C 1, 5, 9, ... - orcc %g0,%g0,%g2 - nop - nop -L(L10): add %o0,-12,%o0 C 2, 6, 10, ... - add %o1,4,%o1 - b L(loop10) - orcc %g0,%g0,%g2 - nop -L(L11): add %o0,-8,%o0 C 3, 7, 11, ... +L(b11): add %o0,-8,%o0 + b L(11) add %o1,-8,%o1 - b L(loop11) +L(bx0): be L(b00) orcc %g0,%g0,%g2 +L(b10): add %o0,-12,%o0 + b L(10) + add %o1,4,%o1 +L(b00): add %o0,-4,%o0 + b L(00) + add %o1,-4,%o1 -L(loop): - addcc %g3,%g2,%g3 C 1 +L(top): addcc %g3,%g2,%g3 C 1 ld [%o1+4],%o4 C 2 st %g3,[%o0+0] C 1 rd %y,%g2 C 1 -L(loop00): - umul %o4,%o3,%g3 C 2 +L(00): umul %o4,%o3,%g3 C 2 addxcc %g3,%g2,%g3 C 2 ld [%o1+8],%o4 C 3 st %g3,[%o0+4] C 2 rd %y,%g2 C 2 -L(loop11): - umul %o4,%o3,%g3 C 3 +L(11): umul %o4,%o3,%g3 C 3 addxcc %g3,%g2,%g3 C 3 ld [%o1+12],%o4 C 4 add %o1,16,%o1 st %g3,[%o0+8] C 3 rd %y,%g2 C 3 -L(loop10): - umul %o4,%o3,%g3 C 4 +L(10): umul %o4,%o3,%g3 C 4 addxcc %g3,%g2,%g3 C 4 ld [%o1+0],%o4 C 1 st %g3,[%o0+12] C 4 add %o0,16,%o0 rd %y,%g2 C 4 addx %g0,%g2,%g2 -L(loop01): - addcc %o2,-4,%o2 - bg L(loop) +L(01): addcc %o2,-4,%o2 + bg L(top) umul %o4,%o3,%g3 C 1 addcc %g3,%g2,%g3 C 4 |