summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTorbjorn Granlund <tege@gmplib.org>2010-12-19 15:56:09 +0100
committerTorbjorn Granlund <tege@gmplib.org>2010-12-19 15:56:09 +0100
commit14366960d27e4942374ce82296bd4d2192580066 (patch)
tree245854e94ffe2fd211faefed7b041170fd2cec4e
parentda16f25e67722a20c21cfd8132e398780264953c (diff)
downloadgmp-14366960d27e4942374ce82296bd4d2192580066.tar.gz
Canonicalise cmov forms.
-rw-r--r--ChangeLog10
-rw-r--r--mpn/x86/k7/mod_1_1.asm6
-rw-r--r--mpn/x86/k7/mod_1_4.asm12
-rw-r--r--mpn/x86/pentium4/sse2/mod_1_1.asm2
-rw-r--r--mpn/x86/pentium4/sse2/mod_1_4.asm8
-rw-r--r--mpn/x86_64/core2/divrem_1.asm4
-rw-r--r--mpn/x86_64/divrem_1.asm10
-rw-r--r--mpn/x86_64/mod_1_1.asm6
-rw-r--r--mpn/x86_64/mod_1_2.asm8
-rw-r--r--mpn/x86_64/mod_1_4.asm12
10 files changed, 44 insertions, 34 deletions
diff --git a/ChangeLog b/ChangeLog
index ef00e01bf..6acb1e291 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,15 @@
2010-12-19 Torbjorn Granlund <tege@gmplib.org>
+ * mpn/x86/k7/mod_1_1.asm: Canonicalise cmov forms.
+ * mpn/x86/k7/mod_1_4.asm: Likewise.
+ * mpn/x86/pentium4/sse2/mod_1_1.asm: Likewise.
+ * mpn/x86/pentium4/sse2/mod_1_4.asm: Likewise.
+ * mpn/x86_64/core2/divrem_1.asm: Likewise.
+ * mpn/x86_64/divrem_1.asm: Likewise.
+ * mpn/x86_64/mod_1_1.asm: Likewise.
+ * mpn/x86_64/mod_1_2.asm: Likewise.
+ * mpn/x86_64/mod_1_4.asm: Likewise.
+
* mpn/x86/k7/gcd_1.asm: Rewrite. Remove slow 'div' loop. Call
mpn_mod_1 for operands with mode than BMOD_1_TO_MOD_1_THRESHOLD limbs.
Misc cleanups.
diff --git a/mpn/x86/k7/mod_1_1.asm b/mpn/x86/k7/mod_1_1.asm
index f7a6706bd..648bcf939 100644
--- a/mpn/x86/k7/mod_1_1.asm
+++ b/mpn/x86/k7/mod_1_1.asm
@@ -101,10 +101,10 @@ L(nrm): lea 1(%eax), %esi
sub %edx, %eax
lea (%eax,%ebp), %edx
cmp %eax, %ebx
- cmovb( %edx, %eax)
+ cmovc( %edx, %eax)
mov %eax, %edx
sub %ebp, %eax
- cmovb( %edx, %eax)
+ cmovc( %edx, %eax)
pop %ebx
pop %esi
pop %edi
@@ -140,7 +140,7 @@ C CAUTION: This is the same code as in pentium4/sse2//mod_1_1.asm
imul %ebp, %edx
add %edx, %ebp
cmp %edx, %eax
- cmovb( %ebp, %edx)
+ cmovc( %ebp, %edx)
shr %cl, %ebx
mov %ebx, 8(%esi) C store B1modb
shr %cl, %edx
diff --git a/mpn/x86/k7/mod_1_4.asm b/mpn/x86/k7/mod_1_4.asm
index 4ff450112..831482f42 100644
--- a/mpn/x86/k7/mod_1_4.asm
+++ b/mpn/x86/k7/mod_1_4.asm
@@ -153,10 +153,10 @@ L(end): mov 4(%esp), %eax
sub %edx, %eax
lea (%eax,%ebp), %edx
cmp %eax, %ebx
- cmovb( %edx, %eax)
+ cmovc( %edx, %eax)
mov %eax, %edx
sub %ebp, %eax
- cmovb( %edx, %eax)
+ cmovc( %edx, %eax)
add $28, %esp
pop %ebx
pop %esi
@@ -200,7 +200,7 @@ C CAUTION: This is the same code as in pentium4/sse2/mod_1_4.asm
imul %ebx, %edx
lea (%edx,%ebx), %esi
cmp %edx, %eax
- cmovae( %edx, %esi)
+ cmovnc( %edx, %esi)
mov %edi, %eax
mul %esi
@@ -212,7 +212,7 @@ C CAUTION: This is the same code as in pentium4/sse2/mod_1_4.asm
imul %ebx, %edx
lea (%edx,%ebx), %esi
cmp %edx, %eax
- cmovae( %edx, %esi)
+ cmovnc( %edx, %esi)
mov %edi, %eax
mul %esi
@@ -224,7 +224,7 @@ C CAUTION: This is the same code as in pentium4/sse2/mod_1_4.asm
imul %ebx, %edx
lea (%edx,%ebx), %esi
cmp %edx, %eax
- cmovae( %edx, %esi)
+ cmovnc( %edx, %esi)
mov %edi, %eax
mul %esi
@@ -236,7 +236,7 @@ C CAUTION: This is the same code as in pentium4/sse2/mod_1_4.asm
imul %ebx, %edx
add %edx, %ebx
cmp %edx, %eax
- cmovae( %edx, %ebx)
+ cmovnc( %edx, %ebx)
shr %cl, %ebx
mov %ebx, 24(%ebp) C store B5modb
diff --git a/mpn/x86/pentium4/sse2/mod_1_1.asm b/mpn/x86/pentium4/sse2/mod_1_1.asm
index bd41598aa..1c1a64687 100644
--- a/mpn/x86/pentium4/sse2/mod_1_1.asm
+++ b/mpn/x86/pentium4/sse2/mod_1_1.asm
@@ -143,7 +143,7 @@ C CAUTION: This is the same code as in k7/mod_1_1.asm
imul %ebp, %edx
add %edx, %ebp
cmp %edx, %eax
- cmovb( %ebp, %edx)
+ cmovc( %ebp, %edx)
shr %cl, %ebx
mov %ebx, 8(%esi) C store B1modb
shr %cl, %edx
diff --git a/mpn/x86/pentium4/sse2/mod_1_4.asm b/mpn/x86/pentium4/sse2/mod_1_4.asm
index b62c7cc3f..edf0affd4 100644
--- a/mpn/x86/pentium4/sse2/mod_1_4.asm
+++ b/mpn/x86/pentium4/sse2/mod_1_4.asm
@@ -209,7 +209,7 @@ C CAUTION: This is the same code as in k7/mod_1_4.asm
imul %ebx, %edx
lea (%edx,%ebx), %esi
cmp %edx, %eax
- cmovae( %edx, %esi)
+ cmovnc( %edx, %esi)
mov %edi, %eax
mul %esi
@@ -221,7 +221,7 @@ C CAUTION: This is the same code as in k7/mod_1_4.asm
imul %ebx, %edx
lea (%edx,%ebx), %esi
cmp %edx, %eax
- cmovae( %edx, %esi)
+ cmovnc( %edx, %esi)
mov %edi, %eax
mul %esi
@@ -233,7 +233,7 @@ C CAUTION: This is the same code as in k7/mod_1_4.asm
imul %ebx, %edx
lea (%edx,%ebx), %esi
cmp %edx, %eax
- cmovae( %edx, %esi)
+ cmovnc( %edx, %esi)
mov %edi, %eax
mul %esi
@@ -245,7 +245,7 @@ C CAUTION: This is the same code as in k7/mod_1_4.asm
imul %ebx, %edx
add %edx, %ebx
cmp %edx, %eax
- cmovae( %edx, %ebx)
+ cmovnc( %edx, %ebx)
shr %cl, %ebx
mov %ebx, 24(%ebp) C store B5modb
diff --git a/mpn/x86_64/core2/divrem_1.asm b/mpn/x86_64/core2/divrem_1.asm
index 9223836f0..05822fdda 100644
--- a/mpn/x86_64/core2/divrem_1.asm
+++ b/mpn/x86_64/core2/divrem_1.asm
@@ -169,7 +169,7 @@ L(end): lea 1(%rax), %r11
mov d, %rax
add %rbp, %rax
cmp %r11, %rbp
- cmovb %rbp, %rax
+ cmovc %rbp, %rax
adc $-1, %r13
cmp d, %rax
jae L(efx)
@@ -197,7 +197,7 @@ L(ftop):mul dinv C 0,12 0,17 0,17
mov d, %rax C
add %rdx, %rax C 10 14 14
cmp %r11, %rdx C 10 14 14
- cmovb %rdx, %rax C 11 15 15
+ cmovc %rdx, %rax C 11 15 15
adc $-1, %r13 C
mov %r13, (qp) C
sub $8, qp C
diff --git a/mpn/x86_64/divrem_1.asm b/mpn/x86_64/divrem_1.asm
index acfa9e91f..d55501944 100644
--- a/mpn/x86_64/divrem_1.asm
+++ b/mpn/x86_64/divrem_1.asm
@@ -109,7 +109,7 @@ L(normalized):
dec un
mov %rbp, %rax
sub d, %rbp
- cmovb %rax, %rbp
+ cmovc %rax, %rbp
sbb R32(%rax), R32(%rax)
inc R32(%rax)
mov %rax, (qp)
@@ -141,7 +141,7 @@ L(ntop): C K8-K10 P6-CNR P6-NHM P4
mov d, %rax C
add %r10, %rax C 11 17 15 34
cmp %rbp, %r10 C 11 17 15 34
- cmovb %r10, %rax C 12 18 16 35
+ cmovc %r10, %rax C 12 18 16 35
adc $-1, %r13 C
cmp d, %rax C
jae L(nfx) C
@@ -216,7 +216,7 @@ L(utop):mov (up,un,8), %r10
mov d, %rax
add %rbp, %rax
cmp %r11, %rbp
- cmovb %rbp, %rax
+ cmovc %rbp, %rax
adc $-1, %r13
cmp d, %rax
jae L(ufx)
@@ -238,7 +238,7 @@ L(uend):shl R8(%rcx), %rbp
mov d, %rax
add %rbp, %rax
cmp %r11, %rbp
- cmovb %rbp, %rax
+ cmovc %rbp, %rax
adc $-1, %r13
cmp d, %rax
jae L(efx)
@@ -266,7 +266,7 @@ L(ftop):mul dinv C 0,12 0,17 0,17
mov d, %rax C
add %rdx, %rax C 10 14 14
cmp %r11, %rdx C 10 14 14
- cmovb %rdx, %rax C 11 15 15
+ cmovc %rdx, %rax C 11 15 15
adc $-1, %r13 C
mov %r13, (qp) C
sub $8, qp C
diff --git a/mpn/x86_64/mod_1_1.asm b/mpn/x86_64/mod_1_1.asm
index 82592dc0f..fa50cece4 100644
--- a/mpn/x86_64/mod_1_1.asm
+++ b/mpn/x86_64/mod_1_1.asm
@@ -116,11 +116,11 @@ L(4):
mov %rsi, %rax
lea (%rsi,%rbp), %rdx
cmp %rsi, %rbx
- cmovb %rdx, %rax
+ cmovc %rdx, %rax
mov %rax, %rdx
sub %rbp, %rdx
cmp %rbp, %rax
- cmovae %rdx, %rax
+ cmovnc %rdx, %rax
mov R32(%rdi), R32(%rcx)
shr R8(%rcx), %rax
pop %rbx
@@ -166,7 +166,7 @@ L(z): mul %r8
imul %r12, %rdx
add %rdx, %r12
cmp %rdx, %rax
- cmovb %r12, %rdx
+ cmovc %r12, %rdx
shr R8(%rcx), %r8
shr R8(%rcx), %rdx
mov %r8, 16(%rbx) C store B1modb
diff --git a/mpn/x86_64/mod_1_2.asm b/mpn/x86_64/mod_1_2.asm
index c7102e877..4533cbe45 100644
--- a/mpn/x86_64/mod_1_2.asm
+++ b/mpn/x86_64/mod_1_2.asm
@@ -133,10 +133,10 @@ L(1): xor R32(%rcx), R32(%rcx)
sub %rdx, %r8
lea (%r8,%r14), %rax
cmp %r8, %rsi
- cmovb %rax, %r8
+ cmovc %rax, %r8
mov %r8, %rax
sub %r14, %rax
- cmovb %r8, %rax
+ cmovc %r8, %rax
mov R32(%rdi), R32(%rcx)
shr R8(%rcx), %rax
pop %rbx
@@ -193,7 +193,7 @@ ifdef(`SHLD_SLOW',`
imul %r12, %rdx
lea (%rdx,%r12), %rsi
cmp %rdx, %rax
- cmovae %rdx, %rsi
+ cmovnc %rdx, %rsi
mov %r11, %rax
mul %rsi
@@ -205,7 +205,7 @@ ifdef(`SHLD_SLOW',`
imul %r12, %rdx
add %rdx, %r12
cmp %rdx, %rax
- cmovae %rdx, %r12
+ cmovnc %rdx, %r12
shr R8(%rcx), %r12
mov %r12, 32(%rbx) C store B3modb
diff --git a/mpn/x86_64/mod_1_4.asm b/mpn/x86_64/mod_1_4.asm
index 2d03759b1..d99080d7f 100644
--- a/mpn/x86_64/mod_1_4.asm
+++ b/mpn/x86_64/mod_1_4.asm
@@ -145,10 +145,10 @@ L(end): mov 8(%r14), R32(%rsi)
sub %rdx, %r8
lea (%r8,%rbx), %rax
cmp %r8, %r9
- cmovb %rax, %r8
+ cmovc %rax, %r8
mov %r8, %rax
sub %rbx, %rax
- cmovb %r8, %rax
+ cmovc %r8, %rax
shr R8(%rcx), %rax
pop %rbx
pop %rbp
@@ -200,7 +200,7 @@ ifdef(`SHLD_SLOW',`
imul %r12, %rdx
lea (%rdx,%r12), %rsi
cmp %rdx, %rax
- cmovae %rdx, %rsi
+ cmovnc %rdx, %rsi
mov %r11, %rax
mul %rsi
@@ -212,7 +212,7 @@ ifdef(`SHLD_SLOW',`
imul %r12, %rdx
lea (%rdx,%r12), %rsi
cmp %rdx, %rax
- cmovae %rdx, %rsi
+ cmovnc %rdx, %rsi
mov %r11, %rax
mul %rsi
@@ -224,7 +224,7 @@ ifdef(`SHLD_SLOW',`
imul %r12, %rdx
lea (%rdx,%r12), %rsi
cmp %rdx, %rax
- cmovae %rdx, %rsi
+ cmovnc %rdx, %rsi
mov %r11, %rax
mul %rsi
@@ -236,7 +236,7 @@ ifdef(`SHLD_SLOW',`
imul %r12, %rdx
add %rdx, %r12
cmp %rdx, %rax
- cmovae %rdx, %r12
+ cmovnc %rdx, %r12
shr R8(%rcx), %r12
mov %r12, 48(%rbx) C store B5modb