summaryrefslogtreecommitdiff
path: root/mpn
diff options
context:
space:
mode:
authorTorbjorn Granlund <tg@gmplib.org>2021-09-02 21:57:13 +0200
committerTorbjorn Granlund <tg@gmplib.org>2021-09-02 21:57:13 +0200
commitbe85b8fde235fb50afe80290dbb265d549f4ec7f (patch)
tree8caf2e35fa87afb833ed5326fcf5ea8384c4d0b6 /mpn
parent251e43ef51b172d3aeec8d595216959ddcf7b089 (diff)
downloadgmp-be85b8fde235fb50afe80290dbb265d549f4ec7f.tar.gz
Rewrite feed-in code, reducing code size and making it work for v8plus.
Diffstat (limited to 'mpn')
-rw-r--r--mpn/sparc32/v8/addmul_1.asm66
-rw-r--r--mpn/sparc32/v8/mul_1.asm59
2 files changed, 42 insertions, 83 deletions
diff --git a/mpn/sparc32/v8/addmul_1.asm b/mpn/sparc32/v8/addmul_1.asm
index 005209278..0bf1b247f 100644
--- a/mpn/sparc32/v8/addmul_1.asm
+++ b/mpn/sparc32/v8/addmul_1.asm
@@ -40,50 +40,32 @@ C s2_limb o3
ASM_START()
PROLOGUE(mpn_addmul_1)
+ ld [%o1+0],%o4
+ andcc %o2,1,%g0
+ be L(bx0)
+ andcc %o2,2,%g0
+L(bx1): be L(01)
orcc %g0,%g0,%g2
- ld [%o1+0],%o4 C 1
-
- sll %o2,4,%g1
- and %g1,(4-1)<<4,%g1
-ifdef(`PIC',
-` mov %o7,%g4 C Save return address register
-0: call 1f
- add %o7,L(1)-0b,%g3
-1: mov %g4,%o7 C Restore return address register
-',
-` sethi %hi(L(1)),%g3
- or %g3,%lo(L(1)),%g3
-')
- jmp %g3+%g1
- nop
-L(1):
-L(L00): add %o0,-4,%o0
- b L(loop00) C 4, 8, 12, ...
- add %o1,-4,%o1
- nop
-L(L01): b L(loop01) C 1, 5, 9, ...
- nop
- nop
- nop
-L(L10): add %o0,-12,%o0 C 2, 6, 10, ...
- b L(loop10)
- add %o1,4,%o1
- nop
-L(L11): add %o0,-8,%o0 C 3, 7, 11, ...
- b L(loop11)
+L(b11): add %o0,-8,%o0
+ b L(11)
add %o1,-8,%o1
- nop
+L(bx0): be L(b00)
+ orcc %g0,%g0,%g2
+L(b10): add %o0,-12,%o0
+ b L(10)
+ add %o1,4,%o1
+L(b00): add %o0,-4,%o0
+ b L(00)
+ add %o1,-4,%o1
-L(loop):
- addcc %g3,%g2,%g3 C 1
+L(top): addcc %g3,%g2,%g3 C 1
ld [%o1+4],%o4 C 2
rd %y,%g2 C 1
addx %g0,%g2,%g2
ld [%o0+0],%g1 C 2
addcc %g1,%g3,%g3
st %g3,[%o0+0] C 1
-L(loop00):
- umul %o4,%o3,%g3 C 2
+L(00): umul %o4,%o3,%g3 C 2
ld [%o0+4],%g1 C 2
addxcc %g3,%g2,%g3 C 2
ld [%o1+8],%o4 C 3
@@ -92,8 +74,7 @@ L(loop00):
nop
addcc %g1,%g3,%g3
st %g3,[%o0+4] C 2
-L(loop11):
- umul %o4,%o3,%g3 C 3
+L(11): umul %o4,%o3,%g3 C 3
addxcc %g3,%g2,%g3 C 3
ld [%o1+12],%o4 C 4
rd %y,%g2 C 3
@@ -102,8 +83,7 @@ L(loop11):
ld [%o0+8],%g1 C 2
addcc %g1,%g3,%g3
st %g3,[%o0+8] C 3
-L(loop10):
- umul %o4,%o3,%g3 C 4
+L(10): umul %o4,%o3,%g3 C 4
addxcc %g3,%g2,%g3 C 4
ld [%o1+0],%o4 C 1
rd %y,%g2 C 4
@@ -113,9 +93,8 @@ L(loop10):
st %g3,[%o0+12] C 4
add %o0,16,%o0
addx %g0,%g2,%g2
-L(loop01):
- addcc %o2,-4,%o2
- bg L(loop)
+L(01): addcc %o2,-4,%o2
+ bg L(top)
umul %o4,%o3,%g3 C 1
addcc %g3,%g2,%g3 C 4
@@ -124,8 +103,7 @@ L(loop01):
ld [%o0+0],%g1 C 2
addcc %g1,%g3,%g3
st %g3,[%o0+0] C 4
- addx %g0,%g2,%o0
retl
- nop
+ addx %g0,%g2,%o0
EPILOGUE(mpn_addmul_1)
diff --git a/mpn/sparc32/v8/mul_1.asm b/mpn/sparc32/v8/mul_1.asm
index e26c853ae..d03a0e6c0 100644
--- a/mpn/sparc32/v8/mul_1.asm
+++ b/mpn/sparc32/v8/mul_1.asm
@@ -40,67 +40,48 @@ C s2_limb o3
ASM_START()
PROLOGUE(mpn_mul_1)
- sll %o2,4,%g1
- and %g1,(4-1)<<4,%g1
-ifdef(`PIC',
-` mov %o7,%g4 C Save return address register
-0: call 1f
- add %o7,L(1)-0b,%g3
-1: mov %g4,%o7 C Restore return address register
-',
-` sethi %hi(L(1)),%g3
- or %g3,%lo(L(1)),%g3
-')
- jmp %g3+%g1
- ld [%o1+0],%o4 C 1
-L(1):
-L(L00): add %o0,-4,%o0
- add %o1,-4,%o1
- b L(loop00) C 4, 8, 12, ...
+ ld [%o1+0],%o4
+ andcc %o2,1,%g0
+ be L(bx0)
+ andcc %o2,2,%g0
+L(bx1): be L(01)
orcc %g0,%g0,%g2
-L(L01): b L(loop01) C 1, 5, 9, ...
- orcc %g0,%g0,%g2
- nop
- nop
-L(L10): add %o0,-12,%o0 C 2, 6, 10, ...
- add %o1,4,%o1
- b L(loop10)
- orcc %g0,%g0,%g2
- nop
-L(L11): add %o0,-8,%o0 C 3, 7, 11, ...
+L(b11): add %o0,-8,%o0
+ b L(11)
add %o1,-8,%o1
- b L(loop11)
+L(bx0): be L(b00)
orcc %g0,%g0,%g2
+L(b10): add %o0,-12,%o0
+ b L(10)
+ add %o1,4,%o1
+L(b00): add %o0,-4,%o0
+ b L(00)
+ add %o1,-4,%o1
-L(loop):
- addcc %g3,%g2,%g3 C 1
+L(top): addcc %g3,%g2,%g3 C 1
ld [%o1+4],%o4 C 2
st %g3,[%o0+0] C 1
rd %y,%g2 C 1
-L(loop00):
- umul %o4,%o3,%g3 C 2
+L(00): umul %o4,%o3,%g3 C 2
addxcc %g3,%g2,%g3 C 2
ld [%o1+8],%o4 C 3
st %g3,[%o0+4] C 2
rd %y,%g2 C 2
-L(loop11):
- umul %o4,%o3,%g3 C 3
+L(11): umul %o4,%o3,%g3 C 3
addxcc %g3,%g2,%g3 C 3
ld [%o1+12],%o4 C 4
add %o1,16,%o1
st %g3,[%o0+8] C 3
rd %y,%g2 C 3
-L(loop10):
- umul %o4,%o3,%g3 C 4
+L(10): umul %o4,%o3,%g3 C 4
addxcc %g3,%g2,%g3 C 4
ld [%o1+0],%o4 C 1
st %g3,[%o0+12] C 4
add %o0,16,%o0
rd %y,%g2 C 4
addx %g0,%g2,%g2
-L(loop01):
- addcc %o2,-4,%o2
- bg L(loop)
+L(01): addcc %o2,-4,%o2
+ bg L(top)
umul %o4,%o3,%g3 C 1
addcc %g3,%g2,%g3 C 4