summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mpn/x86_64/addaddmul_1msb0.asm10
-rw-r--r--mpn/x86_64/addmul_2.asm12
-rw-r--r--mpn/x86_64/aorrlsh1_n.asm13
-rw-r--r--mpn/x86_64/aorrlsh2_n.asm13
-rw-r--r--mpn/x86_64/aorrlsh_n.asm11
-rw-r--r--mpn/x86_64/aors_n.asm13
-rw-r--r--mpn/x86_64/aorsmul_1.asm13
-rw-r--r--mpn/x86_64/atom/aors_n.asm12
-rw-r--r--mpn/x86_64/bdiv_dbm1c.asm13
-rw-r--r--mpn/x86_64/bdiv_q_1.asm13
-rw-r--r--mpn/x86_64/com.asm11
-rw-r--r--mpn/x86_64/copyd.asm12
-rw-r--r--mpn/x86_64/copyi.asm11
-rw-r--r--mpn/x86_64/core2/aors_n.asm12
-rw-r--r--mpn/x86_64/core2/aorslsh1_n.asm11
-rw-r--r--mpn/x86_64/core2/aorsmul_1.asm12
-rw-r--r--mpn/x86_64/core2/lshift.asm12
-rw-r--r--mpn/x86_64/core2/lshiftc.asm12
-rw-r--r--mpn/x86_64/core2/rshift.asm12
-rw-r--r--mpn/x86_64/dive_1.asm13
-rw-r--r--mpn/x86_64/divrem_2.asm11
-rw-r--r--mpn/x86_64/invert_limb.asm13
-rw-r--r--mpn/x86_64/logops_n.asm11
-rw-r--r--mpn/x86_64/lshift.asm12
-rw-r--r--mpn/x86_64/lshiftc.asm12
-rw-r--r--mpn/x86_64/lshsub_n.asm11
-rw-r--r--mpn/x86_64/mod_1_1.asm12
-rw-r--r--mpn/x86_64/mod_1_2.asm12
-rw-r--r--mpn/x86_64/mod_1_4.asm12
-rw-r--r--mpn/x86_64/mod_34lsub1.asm12
-rw-r--r--mpn/x86_64/mode1o.asm13
-rw-r--r--mpn/x86_64/mul_1.asm13
-rw-r--r--mpn/x86_64/mul_2.asm12
-rw-r--r--mpn/x86_64/mul_basecase.asm11
-rw-r--r--mpn/x86_64/pentium4/aors_n.asm11
-rw-r--r--mpn/x86_64/pentium4/aorslsh1_n.asm11
-rw-r--r--mpn/x86_64/pentium4/lshift.asm12
-rw-r--r--mpn/x86_64/pentium4/rshift.asm12
-rw-r--r--mpn/x86_64/popham.asm11
-rw-r--r--mpn/x86_64/redc_1.asm12
-rw-r--r--mpn/x86_64/rsh1aors_n.asm11
-rw-r--r--mpn/x86_64/rshift.asm12
-rw-r--r--mpn/x86_64/sublsh1_n.asm14
43 files changed, 298 insertions, 216 deletions
diff --git a/mpn/x86_64/addaddmul_1msb0.asm b/mpn/x86_64/addaddmul_1msb0.asm
index 89e7bed98..5b2b66847 100644
--- a/mpn/x86_64/addaddmul_1msb0.asm
+++ b/mpn/x86_64/addaddmul_1msb0.asm
@@ -20,9 +20,13 @@ dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
C cycles/limb
-C K8: 2.167
-C P4: 12.0
-C P6-15: 4.0
+C AMD K8,K9 2.167
+C AMD K10 2.167
+C Intel P4 12.0
+C Intel core2 4.0
+C Intel corei ?
+C Intel atom ?
+C VIA nano ?
C TODO
C * Perhaps handle various n mod 3 sizes better. The code now is too large.
diff --git a/mpn/x86_64/addmul_2.asm b/mpn/x86_64/addmul_2.asm
index e762113f6..a2892f858 100644
--- a/mpn/x86_64/addmul_2.asm
+++ b/mpn/x86_64/addmul_2.asm
@@ -21,11 +21,13 @@ dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
C cycles/limb
-C K8,K9: 2.375
-C K10: 2.375
-C P4: ?
-C P6 core2: 4.45
-C P6 corei7: 4.35
+C AMD K8,K9 2.375
+C AMD K10 2.375
+C Intel P4 ?
+C Intel core2 4.45
+C Intel corei 4.35
+C Intel atom ?
+C VIA nano 4.5
C This code is the result of running a code generation and optimization tool
C suite written by David Harvey and Torbjorn Granlund.
diff --git a/mpn/x86_64/aorrlsh1_n.asm b/mpn/x86_64/aorrlsh1_n.asm
index 75fd009c6..3f53e6935 100644
--- a/mpn/x86_64/aorrlsh1_n.asm
+++ b/mpn/x86_64/aorrlsh1_n.asm
@@ -22,12 +22,13 @@ include(`../config.m4')
C cycles/limb
-C K8,K9: 2
-C K10: 2
-C P4: 13
-C P6 core2: 3.45
-C P6 corei7: 3.45
-C P6 atom: ?
+C AMD K8,K9 2
+C AMD K10 2
+C Intel P4 13
+C Intel core2 3.45
+C Intel corei 3.45
+C Intel atom ?
+C VIA nano ?
C Sometimes speed degenerates, supposedly related to that some operand
diff --git a/mpn/x86_64/aorrlsh2_n.asm b/mpn/x86_64/aorrlsh2_n.asm
index 16cecef8d..11e586453 100644
--- a/mpn/x86_64/aorrlsh2_n.asm
+++ b/mpn/x86_64/aorrlsh2_n.asm
@@ -23,12 +23,13 @@ include(`../config.m4')
C cycles/limb
-C K8,K9: 2
-C K10: 2
-C P4: ?
-C P6 core2: 3
-C P6 corei7: 2.75
-C P6 atom: ?
+C AMD K8,K9 2
+C AMD K10 2
+C Intel P4 ?
+C Intel core2 3
+C Intel corei 2.75
+C Intel atom ?
+C VIA nano ?
C INPUT PARAMETERS
define(`rp', `%rdi')
diff --git a/mpn/x86_64/aorrlsh_n.asm b/mpn/x86_64/aorrlsh_n.asm
index 9aa8af97b..f5203767b 100644
--- a/mpn/x86_64/aorrlsh_n.asm
+++ b/mpn/x86_64/aorrlsh_n.asm
@@ -23,10 +23,13 @@ include(`../config.m4')
C cycles/limb
-C K8,K9: 3.25 (mpn_lshift + mpn_add_n costs about 4.1 c/l)
-C K10: 3.25 (mpn_lshift + mpn_add_n costs about 4.1 c/l)
-C P4: 14
-C P6-15: 4
+C AMD K8,K9 3.25 (mpn_lshift + mpn_add_n costs about 4.1 c/l)
+C AMD K10 3.25 (mpn_lshift + mpn_add_n costs about 4.1 c/l)
+C Intel P4 14
+C Intel core2 4
+C Intel corei ?
+C Intel atom ?
+C VIA nano ?
C This was written quickly and not optimized at all. Surely one could get
C closer to 3 c/l or perhaps even under 3 c/l. Ideas:
diff --git a/mpn/x86_64/aors_n.asm b/mpn/x86_64/aors_n.asm
index f2365ab45..be649868f 100644
--- a/mpn/x86_64/aors_n.asm
+++ b/mpn/x86_64/aors_n.asm
@@ -20,12 +20,13 @@ dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
C cycles/limb
-C K8,K9: 1.5
-C K10: 1.5
-C P4: ?
-C P6 core2: 4.9
-C P6 corei7:
-C P6 atom: 4
+C AMD K8,K9 1.5
+C AMD K10 1.5
+C Intel P4 ?
+C Intel core2 4.9
+C Intel corei ?
+C Intel atom 4
+C VIA nano 3.25
C The inner loop of this code is the result of running a code generation and
C optimization tool suite written by David Harvey and Torbjorn Granlund.
diff --git a/mpn/x86_64/aorsmul_1.asm b/mpn/x86_64/aorsmul_1.asm
index cbf4b4693..3bdba0a06 100644
--- a/mpn/x86_64/aorsmul_1.asm
+++ b/mpn/x86_64/aorsmul_1.asm
@@ -20,12 +20,13 @@ dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
C cycles/limb
-C K8,K9: 2.5
-C K10: 2.5
-C P4: 14.9
-C P6 core2: 5.09
-C P6 corei7:
-C P6 atom: 21.3
+C AMD K8,K9 2.5
+C AMD K10 2.5
+C Intel P4 14.9
+C Intel core2 5.09
+C Intel corei ?
+C Intel atom 21.3
+C VIA nano 5.5
C The inner loop of this code is the result of running a code generation and
C optimization tool suite written by David Harvey and Torbjorn Granlund.
diff --git a/mpn/x86_64/atom/aors_n.asm b/mpn/x86_64/atom/aors_n.asm
index 32c19424f..72e231b17 100644
--- a/mpn/x86_64/atom/aors_n.asm
+++ b/mpn/x86_64/atom/aors_n.asm
@@ -21,11 +21,13 @@ include(`../config.m4')
C cycles/limb
-C K8,K9: 1.85
-C K10: ?
-C P4: ?
-C P6-15 (Core2): ?
-C P6-28 (Atom): 3
+C AMD K8,K9 1.85
+C AMD K10 ?
+C Intel P4 ?
+C Intel core2 ?
+C Intel corei ?
+C Intel atom 3
+C VIA nano ?
C INPUT PARAMETERS
define(`rp', `%rdi')
diff --git a/mpn/x86_64/bdiv_dbm1c.asm b/mpn/x86_64/bdiv_dbm1c.asm
index d6775aef9..72173ce85 100644
--- a/mpn/x86_64/bdiv_dbm1c.asm
+++ b/mpn/x86_64/bdiv_dbm1c.asm
@@ -20,12 +20,13 @@ dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
C cycles/limb
-C K8,K9: 2.25
-C K10: ?
-C P4: 12.5
-C P6 core2: 4.0
-C P6 corei7: 3.8
-C P6 atom: 20
+C AMD K8,K9 2.25
+C AMD K10 ?
+C Intel P4 12.5
+C Intel core2 4.0
+C Intel corei 3.8
+C Intel atom 20
+C VIA nano 4.2
C TODO
C * Do proper 4-way feed-in instead of the current epilogue
diff --git a/mpn/x86_64/bdiv_q_1.asm b/mpn/x86_64/bdiv_q_1.asm
index 2356f2bc0..01624a52a 100644
--- a/mpn/x86_64/bdiv_q_1.asm
+++ b/mpn/x86_64/bdiv_q_1.asm
@@ -23,12 +23,13 @@ include(`../config.m4')
C cycles/limb
-C K8,K9: 10
-C K10: 10
-C P4: 33
-C P6 core2: 13.25
-C P6 corei7: 14
-C P6 atom: 42
+C AMD K8,K9 10
+C AMD K10 10
+C Intel P4 33
+C Intel core2 13.25
+C Intel corei 14
+C Intel atom 42
+C VIA nano ?
C INPUT PARAMETERS
diff --git a/mpn/x86_64/com.asm b/mpn/x86_64/com.asm
index 699da11b6..db968e96a 100644
--- a/mpn/x86_64/com.asm
+++ b/mpn/x86_64/com.asm
@@ -21,10 +21,13 @@ include(`../config.m4')
C cycles/limb
-C K8,K9: 1.25
-C K10: 1.25
-C P4: 2.78
-C P6-15: 1.1
+C AMD K8,K9 1.25
+C AMD K10 1.25
+C Intel P4 2.78
+C Intel core2 1.1
+C Intel corei 1.5
+C Intel atom ?
+C VIA nano 2
C INPUT PARAMETERS
define(`rp',`%rdi')
diff --git a/mpn/x86_64/copyd.asm b/mpn/x86_64/copyd.asm
index f5c451cfe..89b26aa16 100644
--- a/mpn/x86_64/copyd.asm
+++ b/mpn/x86_64/copyd.asm
@@ -21,11 +21,13 @@ include(`../config.m4')
C cycles/limb
-C K8,K9: 1
-C K10: 1
-C P4: 2.8
-C P6 core2: 1.2
-C P6 corei7: 1
+C AMD K8,K9 1
+C AMD K10 1
+C Intel P4 2.8
+C Intel core2 1.2
+C Intel corei ?
+C Intel atom ?
+C VIA nano 2
C INPUT PARAMETERS
diff --git a/mpn/x86_64/copyi.asm b/mpn/x86_64/copyi.asm
index 506142be7..c7c46c02d 100644
--- a/mpn/x86_64/copyi.asm
+++ b/mpn/x86_64/copyi.asm
@@ -21,10 +21,13 @@ include(`../config.m4')
C cycles/limb
-C K8,K9: 1
-C K10: 1
-C P4: 2.8
-C P6-15: 1.2
+C AMD K8,K9 1
+C AMD K10 1
+C Intel P4 2.8
+C Intel core2 1.2
+C Intel corei ?
+C Intel atom ?
+C VIA nano 2
C INPUT PARAMETERS
diff --git a/mpn/x86_64/core2/aors_n.asm b/mpn/x86_64/core2/aors_n.asm
index 3dc04d0b7..8a20eba65 100644
--- a/mpn/x86_64/core2/aors_n.asm
+++ b/mpn/x86_64/core2/aors_n.asm
@@ -21,11 +21,13 @@ include(`../config.m4')
C cycles/limb
-C K8,K9: 2.25
-C K10: 2
-C P4: 10
-C P6 core2: 2.05
-C P6 corei7: 2.3
+C AMD K8,K9 2.25
+C AMD K10 2
+C Intel P4 10
+C Intel core2 2.05
+C Intel corei 2.3
+C Intel atom ?
+C VIA nano ?
C INPUT PARAMETERS
define(`rp', `%rdi')
diff --git a/mpn/x86_64/core2/aorslsh1_n.asm b/mpn/x86_64/core2/aorslsh1_n.asm
index 18db7c96f..b8c752b3a 100644
--- a/mpn/x86_64/core2/aorslsh1_n.asm
+++ b/mpn/x86_64/core2/aorslsh1_n.asm
@@ -20,10 +20,13 @@ dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
C cycles/limb
-C K8,K9: 4.25
-C K10: ?
-C P4: ?
-C P6-15: 3
+C AMD K8,K9 4.25
+C AMD K10 ?
+C Intel P4 ?
+C Intel core2 3
+C Intel corei 3
+C Intel atom ?
+C VIA nano ?
C INPUT PARAMETERS
define(`rp',`%rdi')
diff --git a/mpn/x86_64/core2/aorsmul_1.asm b/mpn/x86_64/core2/aorsmul_1.asm
index 8dcccd994..bb4f663c4 100644
--- a/mpn/x86_64/core2/aorsmul_1.asm
+++ b/mpn/x86_64/core2/aorsmul_1.asm
@@ -20,11 +20,13 @@ dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
C cycles/limb
-C K8,K9: 4
-C K10: 4
-C P4: ?
-C P6 core2: 4.3-4.5 (fluctuating)
-C P6 corei7: 5
+C AMD K8,K9 4
+C AMD K10 4
+C Intel P4 ?
+C Intel core2 4.3-4.5 (fluctuating)
+C Intel corei 5
+C Intel atom ?
+C VIA nano ?
C INPUT PARAMETERS
define(`rp', `%rdi')
diff --git a/mpn/x86_64/core2/lshift.asm b/mpn/x86_64/core2/lshift.asm
index e3e400874..a15868b03 100644
--- a/mpn/x86_64/core2/lshift.asm
+++ b/mpn/x86_64/core2/lshift.asm
@@ -21,11 +21,13 @@ include(`../config.m4')
C cycles/limb
-C K8,K9: 4.25
-C K10: 4.25
-C P4: 14.7
-C P6 core2: 1.27
-C P6 corei7: 1.5
+C AMD K8,K9 4.25
+C AMD K10 4.25
+C Intel P4 14.7
+C Intel core2 1.27
+C Intel corei 1.5
+C Intel atom ?
+C VIA nano ?
C INPUT PARAMETERS
diff --git a/mpn/x86_64/core2/lshiftc.asm b/mpn/x86_64/core2/lshiftc.asm
index bc014c855..a2d905dc8 100644
--- a/mpn/x86_64/core2/lshiftc.asm
+++ b/mpn/x86_64/core2/lshiftc.asm
@@ -21,11 +21,13 @@ include(`../config.m4')
C cycles/limb
-C K8,K9: ?
-C K10: ?
-C P4: ?
-C P6 core2: 1.5
-C P6 corei7: 1.75
+C AMD K8,K9 ?
+C AMD K10 ?
+C Intel P4 ?
+C Intel core2 1.5
+C Intel corei 1.75
+C Intel atom ?
+C VIA nano ?
C INPUT PARAMETERS
diff --git a/mpn/x86_64/core2/rshift.asm b/mpn/x86_64/core2/rshift.asm
index 485fd4b0d..bc6224064 100644
--- a/mpn/x86_64/core2/rshift.asm
+++ b/mpn/x86_64/core2/rshift.asm
@@ -21,11 +21,13 @@ include(`../config.m4')
C cycles/limb
-C K8,K9: 4.25
-C K10: 4.25
-C P4: 14.7
-C P6 core2: 1.27
-C P6 corei7: 1.5
+C AMD K8,K9 4.25
+C AMD K10 4.25
+C Intel P4 14.7
+C Intel core2 1.27
+C Intel corei 1.5
+C Intel atom ?
+C VIA nano ?
C INPUT PARAMETERS
diff --git a/mpn/x86_64/dive_1.asm b/mpn/x86_64/dive_1.asm
index f3b6ac8fa..eb955c1ec 100644
--- a/mpn/x86_64/dive_1.asm
+++ b/mpn/x86_64/dive_1.asm
@@ -21,12 +21,13 @@ include(`../config.m4')
C cycles/limb
-C K8,K9: 10
-C K10: 10
-C P4: 33
-C P6 core2: 13.25
-C P6 corei7: 14
-C P6 atom: 42
+C AMD K8,K9 10
+C AMD K10 10
+C Intel P4 33
+C Intel core2 13.25
+C Intel corei 14
+C Intel atom 42
+C VIA nano ?
C A quick adoption of the 32-bit K7 code.
diff --git a/mpn/x86_64/divrem_2.asm b/mpn/x86_64/divrem_2.asm
index 88ffe9c74..6f5084f9b 100644
--- a/mpn/x86_64/divrem_2.asm
+++ b/mpn/x86_64/divrem_2.asm
@@ -21,10 +21,13 @@ include(`../config.m4')
C norm frac
-C K8 20 20
-C P4 73 73
-C P6 core2 37 37
-C P6 corei7 33 33
+C AMD K8,K9 20 20
+C AMD K10 20 20
+C Intel P4 73 73
+C Intel core2 37 37
+C Intel corei 33 33
+C Intel atom ? ?
+C VIA nano ? ?
C TODO
C * Perhaps compute the inverse without relying on divq? Could either use
diff --git a/mpn/x86_64/invert_limb.asm b/mpn/x86_64/invert_limb.asm
index 8dcfae0b0..471ce685d 100644
--- a/mpn/x86_64/invert_limb.asm
+++ b/mpn/x86_64/invert_limb.asm
@@ -23,12 +23,13 @@ include(`../config.m4')
C cycles/limb (approx) div
-C K8,K9: 48 71
-C K10: 48 77
-C P4: 135 161
-C P6 core2: 69 116
-C P6 corei7: 55 89
-C P6 atom: 129 191
+C AMD K8,K9 48 71
+C AMD K10 48 77
+C Intel P4 135 161
+C Intel core2 69 116
+C Intel corei 55 89
+C Intel atom 129 191
+C VIA nano 79 ?
C rax rcx rdx rdi rsi r8
diff --git a/mpn/x86_64/logops_n.asm b/mpn/x86_64/logops_n.asm
index 1022b6137..63f82fd93 100644
--- a/mpn/x86_64/logops_n.asm
+++ b/mpn/x86_64/logops_n.asm
@@ -21,10 +21,13 @@ include(`../config.m4')
C cycles/limb
-C K8,K9: 1.5
-C K10: 1.75-2 (fluctuating)
-C P4: 2.8/3.35/3.60 (variant1/variant2/variant3)
-C P6-15: 2.0
+C AMD K8,K9 1.5
+C AMD K10 1.75-2 (fluctuating)
+C Intel P4 2.8/3.35/3.60 (variant1/variant2/variant3)
+C Intel core2 2
+C Intel corei 2
+C Intel atom ?
+C VIA nano 3.25
ifdef(`OPERATION_and_n',`
define(`func',`mpn_and_n')
diff --git a/mpn/x86_64/lshift.asm b/mpn/x86_64/lshift.asm
index d59d8250a..2f3d5c94d 100644
--- a/mpn/x86_64/lshift.asm
+++ b/mpn/x86_64/lshift.asm
@@ -21,11 +21,13 @@ include(`../config.m4')
C cycles/limb cycles/limb cnt=1
-C K8,K9: 2.375 1.375
-C K10: 2.375 1.375
-C P4: 8 10.5
-C P6-15 (Core2): 2.11 4.28
-C P6-28 (Atom): 5.75 3.5
+C AMD K8,K9 2.375 1.375
+C AMD K10 2.375 1.375
+C Intel P4 8 10.5
+C Intel core2 2.11 4.28
+C Intel corei ? ?
+C Intel atom 5.75 3.5
+C VIA nano 3.5 2.25
C INPUT PARAMETERS
diff --git a/mpn/x86_64/lshiftc.asm b/mpn/x86_64/lshiftc.asm
index 2423529c4..a6b948196 100644
--- a/mpn/x86_64/lshiftc.asm
+++ b/mpn/x86_64/lshiftc.asm
@@ -21,11 +21,13 @@ include(`../config.m4')
C cycles/limb
-C K8,K9: 2.75
-C K10: 2.75
-C P4: ?
-C P6-15 (Core2): ?
-C P6-28 (Atom): ?
+C AMD K8,K9 2.75
+C AMD K10 2.75
+C Intel P4 ?
+C Intel core2 ?
+C Intel corei ?
+C Intel atom ?
+C VIA nano 3.75
C INPUT PARAMETERS
diff --git a/mpn/x86_64/lshsub_n.asm b/mpn/x86_64/lshsub_n.asm
index 6ae7c3640..e00038dea 100644
--- a/mpn/x86_64/lshsub_n.asm
+++ b/mpn/x86_64/lshsub_n.asm
@@ -21,10 +21,13 @@ include(`../config.m4')
C cycles/limb
-C K8,K9: 3.15 (mpn_sub_n + mpn_lshift costs about 4 c/l)
-C K10: 3.15 (mpn_sub_n + mpn_lshift costs about 4 c/l)
-C P4: 16.5
-C P6-15: 4.35
+C AMD K8,K9 3.15 (mpn_sub_n + mpn_lshift costs about 4 c/l)
+C AMD K10 3.15 (mpn_sub_n + mpn_lshift costs about 4 c/l)
+C Intel P4 16.5
+C Intel core2 4.35
+C Intel corei ?
+C Intel atom ?
+C VIA nano ?
C This was written quickly and not optimized at all, but it runs very well on
C K8. But perhaps one could get under 3 c/l. Ideas:
diff --git a/mpn/x86_64/mod_1_1.asm b/mpn/x86_64/mod_1_1.asm
index fac7b412c..27f1e16c9 100644
--- a/mpn/x86_64/mod_1_1.asm
+++ b/mpn/x86_64/mod_1_1.asm
@@ -22,12 +22,12 @@ dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
C cycles/limb
-C K8,K9: 7
-C K10: 7
-C P4: 27
-C P6 core2: 14
-C P6 corei: 12.5
-C P6 atom: 37
+C AMD K8,K9 7
+C AMD K10 7
+C Intel P4 27
+C Intel core2 14
+C Intel corei 12.5
+C Intel atom 37
C VIA nano 15
ASM_START()
diff --git a/mpn/x86_64/mod_1_2.asm b/mpn/x86_64/mod_1_2.asm
index 77a05e0bd..41ac55a1a 100644
--- a/mpn/x86_64/mod_1_2.asm
+++ b/mpn/x86_64/mod_1_2.asm
@@ -22,12 +22,12 @@ dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
C cycles/limb
-C K8,K9: 4
-C K10: 4
-C P4: 19
-C P6 core2: 8
-C P6 corei: 6.5
-C P6 atom: 28
+C AMD K8,K9 4
+C AMD K10 4
+C Intel P4 19
+C Intel core2 8
+C Intel corei 6.5
+C Intel atom 28
C VIA nano 8
ASM_START()
diff --git a/mpn/x86_64/mod_1_4.asm b/mpn/x86_64/mod_1_4.asm
index 417991a6b..d2eb06837 100644
--- a/mpn/x86_64/mod_1_4.asm
+++ b/mpn/x86_64/mod_1_4.asm
@@ -22,12 +22,12 @@ dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
C cycles/limb
-C K8,K9: 3
-C K10: 3
-C P4: 15.5
-C P6 core2: 5
-C P6 corei: 4.25
-C P6 atom: 23
+C AMD K8,K9 3
+C AMD K10 3
+C Intel P4 15.5
+C Intel core2 5
+C Intel corei 4.25
+C Intel atom 23
C VIA nano 5
ASM_START()
diff --git a/mpn/x86_64/mod_34lsub1.asm b/mpn/x86_64/mod_34lsub1.asm
index 318fb96d6..59a960863 100644
--- a/mpn/x86_64/mod_34lsub1.asm
+++ b/mpn/x86_64/mod_34lsub1.asm
@@ -22,11 +22,13 @@ include(`../config.m4')
C cycles/limb
-C K8,K9: 1.0
-C K10: 1.12
-C P4: 3.25
-C P6-15 (Core2): 1.5
-C P6-28 (Atom): 2.5
+C AMD K8,K9 1.0
+C AMD K10 1.12
+C Intel P4 3.25
+C Intel core2 1.5
+C Intel corei 1.5
+C Intel atom 2.5
+C VIA nano 1.75
C INPUT PARAMETERS
diff --git a/mpn/x86_64/mode1o.asm b/mpn/x86_64/mode1o.asm
index ae5f83c29..d2e82b094 100644
--- a/mpn/x86_64/mode1o.asm
+++ b/mpn/x86_64/mode1o.asm
@@ -22,12 +22,13 @@ include(`../config.m4')
C cycles/limb
-C K8,K9: 10
-C K10: 10
-C P4: 33
-C P6 core2: 13
-C P6 corei7: 14.5
-C P6 Atom: 35
+C AMD K8,K9 10
+C AMD K10 10
+C Intel P4 33
+C Intel core2 13
+C Intel corei 14.5
+C Intel atom 35
+C VIA nano ?
C mp_limb_t mpn_modexact_1_odd (mp_srcptr src, mp_size_t size,
diff --git a/mpn/x86_64/mul_1.asm b/mpn/x86_64/mul_1.asm
index a0c45990e..29340c390 100644
--- a/mpn/x86_64/mul_1.asm
+++ b/mpn/x86_64/mul_1.asm
@@ -20,12 +20,13 @@ dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
C cycles/limb
-C K8,K9: 2.5
-C K10: 2.5
-C P4: 12.3
-C P6 core2: 4.0
-C P6 corei7: 3.8
-C Atom: 19.8
+C AMD K8,K9 2.5
+C AMD K10 2.5
+C Intel P4 12.3
+C Intel core2 4.0
+C Intel corei 3.8
+C Intel atom 19.8
+C VIA nano ?
C The inner loop of this code is the result of running a code generation and
C optimization tool suite written by David Harvey and Torbjorn Granlund.
diff --git a/mpn/x86_64/mul_2.asm b/mpn/x86_64/mul_2.asm
index ab87aaf21..7f8d0a167 100644
--- a/mpn/x86_64/mul_2.asm
+++ b/mpn/x86_64/mul_2.asm
@@ -21,11 +21,13 @@ dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
C cycles/limb
-C K8,K9: 2.275
-C K10: 2.275
-C P4: ?
-C P6 core2: 4.0
-C P6 corei7: 3.8
+C AMD K8,K9 2.275
+C AMD K10 2.275
+C Intel P4 ?
+C Intel core2 4.0
+C Intel corei 3.8
+C Intel atom ?
+C VIA nano ?
C This code is the result of running a code generation and optimization tool
C suite written by David Harvey and Torbjorn Granlund.
diff --git a/mpn/x86_64/mul_basecase.asm b/mpn/x86_64/mul_basecase.asm
index 532076629..bc0ea1933 100644
--- a/mpn/x86_64/mul_basecase.asm
+++ b/mpn/x86_64/mul_basecase.asm
@@ -22,10 +22,13 @@ dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
C cycles/limb
-C K8,K9: 2.375
-C K10: 2.375
-C P4: ?
-C P6-15: 4.45
+C AMD K8,K9 2.375
+C AMD K10 2.375
+C Intel P4 ?
+C Intel core2 4.45
+C Intel corei ?
+C Intel atom ?
+C VIA nano ?
C The inner loops of this code are the result of running a code generation and
C optimization tool suite written by David Harvey and Torbjorn Granlund.
diff --git a/mpn/x86_64/pentium4/aors_n.asm b/mpn/x86_64/pentium4/aors_n.asm
index 90f5a219b..bc49aa90b 100644
--- a/mpn/x86_64/pentium4/aors_n.asm
+++ b/mpn/x86_64/pentium4/aors_n.asm
@@ -21,10 +21,13 @@ include(`../config.m4')
C cycles/limb
-C K8,K9: 2.8
-C K10: 2.8
-C P4: 4
-C P6-15: 3.6-5 (fluctuating)
+C AMD K8,K9 2.8
+C AMD K10 2.8
+C Intel P4 4
+C Intel core2 3.6-5 (fluctuating)
+C Intel corei ?
+C Intel atom ?
+C VIA nano ?
C INPUT PARAMETERS
diff --git a/mpn/x86_64/pentium4/aorslsh1_n.asm b/mpn/x86_64/pentium4/aorslsh1_n.asm
index 0723f3e6c..cd5452768 100644
--- a/mpn/x86_64/pentium4/aorslsh1_n.asm
+++ b/mpn/x86_64/pentium4/aorslsh1_n.asm
@@ -21,10 +21,13 @@ dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
C cycles/limb
-C K8,K9: 3.8
-C K10: 4.8
-C P4: 5.8
-C P6-15: ?
+C AMD K8,K9 3.8
+C AMD K10 4.8
+C Intel P4 5.8
+C Intel core2 ?
+C Intel corei ?
+C Intel atom ?
+C VIA nano ?
C INPUT PARAMETERS
diff --git a/mpn/x86_64/pentium4/lshift.asm b/mpn/x86_64/pentium4/lshift.asm
index 7596d9c5c..df52b95c6 100644
--- a/mpn/x86_64/pentium4/lshift.asm
+++ b/mpn/x86_64/pentium4/lshift.asm
@@ -21,11 +21,13 @@ include(`../config.m4')
C cycles/limb
-C K8,K9: 2.5
-C K10: ?
-C P4: 3.29
-C P6-15 (Core2): 2.1 (fluctuates, presumably cache related)
-C P6-28 (Atom): 14.3
+C AMD K8,K9 2.5
+C AMD K10 ?
+C Intel P4 3.29
+C Intel core2 2.1 (fluctuates, presumably cache related)
+C Intel corei ?
+C Intel atom 14.3
+C VIA nano ?
C INPUT PARAMETERS
define(`rp',`%rdi')
diff --git a/mpn/x86_64/pentium4/rshift.asm b/mpn/x86_64/pentium4/rshift.asm
index 61899c5ec..da0bb8b07 100644
--- a/mpn/x86_64/pentium4/rshift.asm
+++ b/mpn/x86_64/pentium4/rshift.asm
@@ -21,11 +21,13 @@ include(`../config.m4')
C cycles/limb
-C K8,K9: 2.5
-C K10: ?
-C P4: 3.29
-C P6-15 (Core2): 2.1 (fluctuates, presumably cache related)
-C P6-28 (Atom): 14.3
+C AMD K8,K9 2.5
+C AMD K10 ?
+C Intel P4 3.29
+C Intel core2 2.1 (fluctuates, presumably cache related)
+C Intel corei ?
+C Intel atom 14.3
+C VIA nano ?
C INPUT PARAMETERS
define(`rp',`%rdi')
diff --git a/mpn/x86_64/popham.asm b/mpn/x86_64/popham.asm
index e2bdb1a0b..bd335f63f 100644
--- a/mpn/x86_64/popham.asm
+++ b/mpn/x86_64/popham.asm
@@ -23,10 +23,13 @@ include(`../config.m4')
C popcount hamdist
C cycles/limb cycles/limb
-C K8,K9: 6 7
-C K10: 6 7
-C P4: 12 14.3
-C P6-15: 7 8
+C AMD K8,K9 6 7
+C AMD K10 6 7
+C Intel P4 12 14.3
+C Intel core2 7 8
+C Intel corei ? 7.3
+C Intel atom ? ?
+C VIA nano 9.25 11
C TODO
C * Tune. It should be possible to reach 5 c/l for popcount and 6 c/l for
diff --git a/mpn/x86_64/redc_1.asm b/mpn/x86_64/redc_1.asm
index ceaadea01..08dd5baa3 100644
--- a/mpn/x86_64/redc_1.asm
+++ b/mpn/x86_64/redc_1.asm
@@ -22,11 +22,13 @@ include(`../config.m4')
C cycles/limb
C cycles/limb
-C K8,K9: 2.5
-C K10: 2.5
-C P4: ?
-C P6-15 (Core2): 5.3
-C P6-28 (Atom): ?
+C AMD K8,K9 2.5
+C AMD K10 2.5
+C Intel P4 ?
+C Intel core2 5.3
+C Intel corei ?
+C Intel atom ?
+C VIA nano ?
C TODO
C * Handle certain sizes, e.g., 1, 2, 3, 4, 8, with single-loop code.
diff --git a/mpn/x86_64/rsh1aors_n.asm b/mpn/x86_64/rsh1aors_n.asm
index 41e67e371..1841d1a75 100644
--- a/mpn/x86_64/rsh1aors_n.asm
+++ b/mpn/x86_64/rsh1aors_n.asm
@@ -21,10 +21,13 @@ include(`../config.m4')
C cycles/limb
-C K8,K9: 2.14 (mpn_add_n + mpn_rshift need 4.125)
-C K10: 2.14 (mpn_add_n + mpn_rshift need 4.125)
-C P4: 12.75
-C P6-15: 3.75
+C AMD K8,K9 2.14 (mpn_add_n + mpn_rshift need 4.125)
+C AMD K10 2.14 (mpn_add_n + mpn_rshift need 4.125)
+C AMD P4 12.75
+C Intel core2 3.75
+C Intel corei ?
+C Intel atom ?
+C VIA nano 3.25
C TODO
C * Rewrite to use indexed addressing, like addlsh1.asm and sublsh1.asm.
diff --git a/mpn/x86_64/rshift.asm b/mpn/x86_64/rshift.asm
index 3b1586828..0f822a4a0 100644
--- a/mpn/x86_64/rshift.asm
+++ b/mpn/x86_64/rshift.asm
@@ -21,11 +21,13 @@ include(`../config.m4')
C cycles/limb
-C K8,K9: 2.375
-C K10: 2.375
-C P4: 8
-C P6-15 (Core2): 2.11
-C P6-28 (Atom): 5.75
+C AMD K8,K9 2.375
+C AMD K10 2.375
+C Intel P4 8
+C Intel core2 2.11
+C Intel corei ?
+C Intel atom 5.75
+C VIA nano 3.5
C INPUT PARAMETERS
diff --git a/mpn/x86_64/sublsh1_n.asm b/mpn/x86_64/sublsh1_n.asm
index 6f67fae36..a2f48c007 100644
--- a/mpn/x86_64/sublsh1_n.asm
+++ b/mpn/x86_64/sublsh1_n.asm
@@ -21,13 +21,13 @@ include(`../config.m4')
C cycles/limb
-C K8,K9: 2.2
-C K10: 2.2
-C P4: 12.75
-C P6 core2: 3.45
-C P6 corei7: 3.45
-C P6 atom: ?
-
+C AMD K8,K9 2.2
+C AMD K10 2.2
+C Intel P4 12.75
+C Intel core2 3.45
+C Intel corei ?
+C Intel atom ?
+C VIA nano 3.25
C Sometimes speed degenerates, supposedly related to that some operand
C alignments cause cache conflicts.