1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
|
dnl PowerPC-32 mpn_tabselect.
dnl Copyright 2011 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl The GNU MP Library is free software; you can redistribute it and/or modify
dnl it under the terms of the GNU Lesser General Public License as published
dnl by the Free Software Foundation; either version 3 of the License, or (at
dnl your option) any later version.
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
dnl License for more details.
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
C cycles/limb
C 603e: ?
C 604e: ?
C 75x (G3): ?
C 7400,7410 (G4): ?
C 744x,745x (G4+): ?
C power4/ppc970: 3.3
C power5: ?
C NOTES
C * This has not been tuned for any specific processor. Its speed should not
C be too bad, though.
C * Using VMX could result in significant speedup for certain CPUs.
C mpn_tabselect (mp_limb_t *rp, mp_limb_t *tp, mp_size_t n, mp_size_t nents, mp_size_t which)
define(`rp', `r3')
define(`tp', `r4')
define(`n', `r5')
define(`nents', `r6')
define(`which', `r7')
define(`mask', `r8')
ASM_START()
TEXT
ALIGN(16)
PROLOGUE(mpn_tabselect)
addi r0, n, 1
srdi r0, r0, 1 C inner loop count
andi. r9, n, 1 C set cr0 for use in inner loop
subf which, nents, which
sldi n, n, 2
L(outer):
mtctr r0 C put inner loop count in ctr
add r9, which, nents C are we at the selected table entry?
addic r9, r9, -1 C set CF iff not selected entry
subfe mask, r0, r0
beq cr0, L(top) C branch to loop entry if n even
lwz r9, 0(tp)
addi tp, tp, 4
and r9, r9, mask
lwz r11, 0(rp)
andc r11, r11, mask
or r9, r9, r11
stw r9, 0(rp)
addi rp, rp, 4
bdz L(end)
ALIGN(16)
L(top): lwz r9, 0(tp)
lwz r10, 4(tp)
addi tp, tp, 8
nop
and r9, r9, mask
and r10, r10, mask
lwz r11, 0(rp)
lwz r12, 4(rp)
andc r11, r11, mask
andc r12, r12, mask
or r9, r9, r11
or r10, r10, r12
stw r9, 0(rp)
stw r10, 4(rp)
addi rp, rp, 8
bdnz L(top)
L(end): subf rp, n, rp C move rp back to beginning
cmpdi cr6, nents, 1
addi nents, nents, -1
bne cr6, L(outer)
blr
EPILOGUE()
|