1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
|
dnl x86_64 mpn_bdiv_dbm1.
dnl Copyright 2008, 2011 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl The GNU MP Library is free software; you can redistribute it and/or modify
dnl it under the terms of the GNU Lesser General Public License as published
dnl by the Free Software Foundation; either version 3 of the License, or (at
dnl your option) any later version.
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
dnl License for more details.
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
C cycles/limb
C AMD K8,K9 2.25
C AMD K10 2.25
C Intel P4 12.5
C Intel core2 4
C Intel NHM 3.75
C Intel SBR 3.6
C Intel atom 20
C VIA nano 4
C TODO
C * Optimise feed-in code.
C INPUT PARAMETERS
define(`qp', `%rdi')
define(`up', `%rsi')
define(`n_param', `%rdx')
define(`bd', `%rcx')
define(`cy', `%r8')
define(`n', `%r9')
ifdef(`HOST_DOS64',`
define(`IFDOS', `$1')
define(`IFELF', `')
',`
define(`IFDOS', `')
define(`IFELF', `$1')
')
ABI_SUPPORT(DOS64)
ABI_SUPPORT(ELF64)
ASM_START()
TEXT
ALIGN(16)
PROLOGUE(mpn_bdiv_dbm1c)
DOS64_ENTRY(4)
IFDOS(` mov 56(%rsp), %r8 ')
mov (up), %rax
mov n_param, n
mov R32(n_param), R32(%r11)
mul bd
lea (up,n,8), up
lea (qp,n,8), qp
neg n
and $3, R32(%r11)
jz L(lo0)
lea -4(n,%r11), n
cmp $2, R32(%r11)
jc L(lo1)
jz L(lo2)
jmp L(lo3)
ALIGN(16)
L(top): mov (up,n,8), %rax
mul bd
L(lo0): sub %rax, %r8
mov %r8, (qp,n,8)
sbb %rdx, %r8
mov 8(up,n,8), %rax
mul bd
L(lo3): sub %rax, %r8
mov %r8, 8(qp,n,8)
sbb %rdx, %r8
mov 16(up,n,8), %rax
mul bd
L(lo2): sub %rax, %r8
mov %r8, 16(qp,n,8)
sbb %rdx, %r8
mov 24(up,n,8), %rax
mul bd
L(lo1): sub %rax, %r8
mov %r8, 24(qp,n,8)
sbb %rdx, %r8
add $4, n
jnz L(top)
mov %r8, %rax
DOS64_EXIT()
ret
EPILOGUE()
|