summaryrefslogtreecommitdiff
path: root/mpn/powerpc64/mode64/invert_limb.asm
blob: ff3e2ba8d2f664af386af771c8779f389a60e9fb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
dnl  PowerPC-64 mpn_invert_limb -- Invert a normalized limb.

dnl  Copyright 2004, 2005, 2006 Free Software Foundation, Inc.

dnl  This file is part of the GNU MP Library.

dnl  The GNU MP Library is free software; you can redistribute it and/or modify
dnl  it under the terms of the GNU Lesser General Public License as published
dnl  by the Free Software Foundation; either version 3 of the License, or (at
dnl  your option) any later version.

dnl  The GNU MP Library is distributed in the hope that it will be useful, but
dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
dnl  License for more details.

dnl  You should have received a copy of the GNU Lesser General Public License
dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.

include(`../config.m4')

C		cycles/limb
C POWER3/PPC630:     ?
C POWER4/PPC970:     75 (including call+ret)

C This was generated by GCC, then roughly hand optimized.  There is probably
C more optimization headroom.

ASM_START()
PROLOGUE(mpn_invert_limb)
	LEA(	r12, approx_tab)

	srdi	r11, r3, 32		C r11 = d >> 32
	rlwinm  r9, r11, 10, 23, 30	C r9 = ((d >> 55) & 0xff) << 1
	lhzx	r0, r12, r9		C load initial approximation
	rldic	r10, r0, 6, 42
	mulld	r8, r10, r10
	sldi	r9, r10, 17
	mulld	r0, r8, r11
	srdi	r0, r0, 31
	subf	r10, r0, r9
	mulld	r8, r10, r10
	sldi	r11, r10, 33
	mulhdu	r0, r8, r3
	sldi	r9, r0, 1
	subf	r10, r9, r11
	sldi	r11, r10, 2
	mulhdu	r0, r10, r10
	mulld	r8, r10, r10
	mulhdu	r10, r8, r3
	mulld	r9, r0, r3
	mulhdu	r0, r0, r3
	addc	r8, r9, r10
	addze	r10, r0
	srdi	r0, r8, 62
	rldimi	r0, r10, 2, 0
	sldi	r9, r8, 2
	subfic	r10, r9, 0
	subfe	r8, r0, r11
	mulhdu	r10, r3, r8
	add	r10, r10, r3
	mulld	r9, r3, r8
	subf	r11, r10, r8
	addi	r0, r10, 1
	addi	r8, r11, -1
	and	r0, r3, r0
	addc	r11, r9, r0
	addze	r10, r10
	addc	r0, r11, r3
	addze	r10, r10
	subf	r3, r10, r8
	blr
EPILOGUE()

DEF_OBJECT(approx_tab)
	.short	1023,1020,1016,1012,1008,1004,1000,996
	.short	992,989,985,981,978,974,970,967
	.short	963,960,956,953,949,946,942,939
	.short	936,932,929,926,923,919,916,913
	.short	910,907,903,900,897,894,891,888
	.short	885,882,879,876,873,870,868,865
	.short	862,859,856,853,851,848,845,842
	.short	840,837,834,832,829,826,824,821
	.short	819,816,814,811,809,806,804,801
	.short	799,796,794,791,789,787,784,782
	.short	780,777,775,773,771,768,766,764
	.short	762,759,757,755,753,751,748,746
	.short	744,742,740,738,736,734,732,730
	.short	728,726,724,722,720,718,716,714
	.short	712,710,708,706,704,702,700,699
	.short	697,695,693,691,689,688,686,684
	.short	682,680,679,677,675,673,672,670
	.short	668,667,665,663,661,660,658,657
	.short	655,653,652,650,648,647,645,644
	.short	642,640,639,637,636,634,633,631
	.short	630,628,627,625,624,622,621,619
	.short	618,616,615,613,612,611,609,608
	.short	606,605,604,602,601,599,598,597
	.short	595,594,593,591,590,589,587,586
	.short	585,583,582,581,579,578,577,576
	.short	574,573,572,571,569,568,567,566
	.short	564,563,562,561,560,558,557,556
	.short	555,554,553,551,550,549,548,547
	.short	546,544,543,542,541,540,539,538
	.short	537,536,534,533,532,531,530,529
	.short	528,527,526,525,524,523,522,521
	.short	520,519,518,517,516,515,514,513
END_OBJECT(approx_tab)
ASM_END()