Canonicalise cmov forms.

author: Torbjorn Granlund <tege@gmplib.org> 2010-12-19 15:56:09 +0100
committer: Torbjorn Granlund <tege@gmplib.org> 2010-12-19 15:56:09 +0100
commit: 14366960d27e4942374ce82296bd4d2192580066 (patch)
tree: 245854e94ffe2fd211faefed7b041170fd2cec4e
parent: da16f25e67722a20c21cfd8132e398780264953c (diff)
download: gmp-14366960d27e4942374ce82296bd4d2192580066.tar.gz
10 files changed, 44 insertions, 34 deletions
diff --git a/ChangeLog b/ChangeLog
index ef00e01bf..6acb1e291 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,15 @@
 2010-12-19  Torbjorn Granlund  <tege@gmplib.org>
 
+	* mpn/x86/k7/mod_1_1.asm: Canonicalise cmov forms.
+	* mpn/x86/k7/mod_1_4.asm: Likewise.
+	* mpn/x86/pentium4/sse2/mod_1_1.asm: Likewise.
+	* mpn/x86/pentium4/sse2/mod_1_4.asm: Likewise.
+	* mpn/x86_64/core2/divrem_1.asm: Likewise.
+	* mpn/x86_64/divrem_1.asm: Likewise.
+	* mpn/x86_64/mod_1_1.asm: Likewise.
+	* mpn/x86_64/mod_1_2.asm: Likewise.
+	* mpn/x86_64/mod_1_4.asm: Likewise.
+
 	* mpn/x86/k7/gcd_1.asm: Rewrite.  Remove slow 'div' loop.  Call
 	mpn_mod_1 for operands with mode than BMOD_1_TO_MOD_1_THRESHOLD limbs.
 	Misc cleanups.
diff --git a/mpn/x86/k7/mod_1_1.asm b/mpn/x86/k7/mod_1_1.asm
index f7a6706bd..648bcf939 100644
--- a/mpn/x86/k7/mod_1_1.asm
+++ b/mpn/x86/k7/mod_1_1.asm
@@ -101,10 +101,10 @@ L(nrm):	lea	1(%eax), %esi
 	sub	%edx, %eax
 	lea	(%eax,%ebp), %edx
 	cmp	%eax, %ebx
-	cmovb(	%edx, %eax)
+	cmovc(	%edx, %eax)
 	mov	%eax, %edx
 	sub	%ebp, %eax
-	cmovb(	%edx, %eax)
+	cmovc(	%edx, %eax)
 	pop	%ebx
 	pop	%esi
 	pop	%edi
@@ -140,7 +140,7 @@ C CAUTION: This is the same code as in pentium4/sse2//mod_1_1.asm
 	imul	%ebp, %edx
 	add	%edx, %ebp
 	cmp	%edx, %eax
-	cmovb(	%ebp, %edx)
+	cmovc(	%ebp, %edx)
 	shr	%cl, %ebx
 	mov	%ebx, 8(%esi)		C store B1modb
 	shr	%cl, %edx
diff --git a/mpn/x86/k7/mod_1_4.asm b/mpn/x86/k7/mod_1_4.asm
index 4ff450112..831482f42 100644
--- a/mpn/x86/k7/mod_1_4.asm
+++ b/mpn/x86/k7/mod_1_4.asm
@@ -153,10 +153,10 @@ L(end):	mov	4(%esp), %eax
 	sub	%edx, %eax
 	lea	(%eax,%ebp), %edx
 	cmp	%eax, %ebx
-	cmovb(	%edx, %eax)
+	cmovc(	%edx, %eax)
 	mov	%eax, %edx
 	sub	%ebp, %eax
-	cmovb(	%edx, %eax)
+	cmovc(	%edx, %eax)
 	add	$28, %esp
 	pop	%ebx
 	pop	%esi
@@ -200,7 +200,7 @@ C CAUTION: This is the same code as in pentium4/sse2/mod_1_4.asm
 	imul	%ebx, %edx
 	lea	(%edx,%ebx), %esi
 	cmp	%edx, %eax
-	cmovae(	%edx, %esi)
+	cmovnc(	%edx, %esi)
 	mov	%edi, %eax
 	mul	%esi
 
@@ -212,7 +212,7 @@ C CAUTION: This is the same code as in pentium4/sse2/mod_1_4.asm
 	imul	%ebx, %edx
 	lea	(%edx,%ebx), %esi
 	cmp	%edx, %eax
-	cmovae(	%edx, %esi)
+	cmovnc(	%edx, %esi)
 	mov	%edi, %eax
 	mul	%esi
 
@@ -224,7 +224,7 @@ C CAUTION: This is the same code as in pentium4/sse2/mod_1_4.asm
 	imul	%ebx, %edx
 	lea	(%edx,%ebx), %esi
 	cmp	%edx, %eax
-	cmovae(	%edx, %esi)
+	cmovnc(	%edx, %esi)
 	mov	%edi, %eax
 	mul	%esi
 
@@ -236,7 +236,7 @@ C CAUTION: This is the same code as in pentium4/sse2/mod_1_4.asm
 	imul	%ebx, %edx
 	add	%edx, %ebx
 	cmp	%edx, %eax
-	cmovae(	%edx, %ebx)
+	cmovnc(	%edx, %ebx)
 
 	shr	%cl, %ebx
 	mov	%ebx, 24(%ebp)		C store B5modb
diff --git a/mpn/x86/pentium4/sse2/mod_1_1.asm b/mpn/x86/pentium4/sse2/mod_1_1.asm
index bd41598aa..1c1a64687 100644
--- a/mpn/x86/pentium4/sse2/mod_1_1.asm
+++ b/mpn/x86/pentium4/sse2/mod_1_1.asm
@@ -143,7 +143,7 @@ C CAUTION: This is the same code as in k7/mod_1_1.asm
 	imul	%ebp, %edx
 	add	%edx, %ebp
 	cmp	%edx, %eax
-	cmovb(	%ebp, %edx)
+	cmovc(	%ebp, %edx)
 	shr	%cl, %ebx
 	mov	%ebx, 8(%esi)		C store B1modb
 	shr	%cl, %edx
diff --git a/mpn/x86/pentium4/sse2/mod_1_4.asm b/mpn/x86/pentium4/sse2/mod_1_4.asm
index b62c7cc3f..edf0affd4 100644
--- a/mpn/x86/pentium4/sse2/mod_1_4.asm
+++ b/mpn/x86/pentium4/sse2/mod_1_4.asm
@@ -209,7 +209,7 @@ C CAUTION: This is the same code as in k7/mod_1_4.asm
 	imul	%ebx, %edx
 	lea	(%edx,%ebx), %esi
 	cmp	%edx, %eax
-	cmovae(	%edx, %esi)
+	cmovnc(	%edx, %esi)
 	mov	%edi, %eax
 	mul	%esi
 
@@ -221,7 +221,7 @@ C CAUTION: This is the same code as in k7/mod_1_4.asm
 	imul	%ebx, %edx
 	lea	(%edx,%ebx), %esi
 	cmp	%edx, %eax
-	cmovae(	%edx, %esi)
+	cmovnc(	%edx, %esi)
 	mov	%edi, %eax
 	mul	%esi
 
@@ -233,7 +233,7 @@ C CAUTION: This is the same code as in k7/mod_1_4.asm
 	imul	%ebx, %edx
 	lea	(%edx,%ebx), %esi
 	cmp	%edx, %eax
-	cmovae(	%edx, %esi)
+	cmovnc(	%edx, %esi)
 	mov	%edi, %eax
 	mul	%esi
 
@@ -245,7 +245,7 @@ C CAUTION: This is the same code as in k7/mod_1_4.asm
 	imul	%ebx, %edx
 	add	%edx, %ebx
 	cmp	%edx, %eax
-	cmovae(	%edx, %ebx)
+	cmovnc(	%edx, %ebx)
 
 	shr	%cl, %ebx
 	mov	%ebx, 24(%ebp)		C store B5modb
diff --git a/mpn/x86_64/core2/divrem_1.asm b/mpn/x86_64/core2/divrem_1.asm
index 9223836f0..05822fdda 100644
--- a/mpn/x86_64/core2/divrem_1.asm
+++ b/mpn/x86_64/core2/divrem_1.asm
@@ -169,7 +169,7 @@ L(end):	lea	1(%rax), %r11
 	mov	d, %rax
 	add	%rbp, %rax
 	cmp	%r11, %rbp
-	cmovb	%rbp, %rax
+	cmovc	%rbp, %rax
 	adc	$-1, %r13
 	cmp	d, %rax
 	jae	L(efx)
@@ -197,7 +197,7 @@ L(ftop):mul	dinv			C	      0,12   0,17   0,17
 	mov	d, %rax			C
 	add	%rdx, %rax		C	     10     14     14
 	cmp	%r11, %rdx		C	     10     14     14
-	cmovb	%rdx, %rax		C	     11     15     15
+	cmovc	%rdx, %rax		C	     11     15     15
 	adc	$-1, %r13		C
 	mov	%r13, (qp)		C
 	sub	$8, qp			C
diff --git a/mpn/x86_64/divrem_1.asm b/mpn/x86_64/divrem_1.asm
index acfa9e91f..d55501944 100644
--- a/mpn/x86_64/divrem_1.asm
+++ b/mpn/x86_64/divrem_1.asm
@@ -109,7 +109,7 @@ L(normalized):
 	dec	un
 	mov	%rbp, %rax
 	sub	d, %rbp
-	cmovb	%rax, %rbp
+	cmovc	%rax, %rbp
 	sbb	R32(%rax), R32(%rax)
 	inc	R32(%rax)
 	mov	%rax, (qp)
@@ -141,7 +141,7 @@ L(ntop):				C	    K8-K10  P6-CNR P6-NHM  P4
 	mov	d, %rax			C
 	add	%r10, %rax		C	     11     17     15     34
 	cmp	%rbp, %r10		C	     11     17     15     34
-	cmovb	%r10, %rax		C	     12     18     16     35
+	cmovc	%r10, %rax		C	     12     18     16     35
 	adc	$-1, %r13		C
 	cmp	d, %rax			C
 	jae	L(nfx)			C
@@ -216,7 +216,7 @@ L(utop):mov	(up,un,8), %r10
 	mov	d, %rax
 	add	%rbp, %rax
 	cmp	%r11, %rbp
-	cmovb	%rbp, %rax
+	cmovc	%rbp, %rax
 	adc	$-1, %r13
 	cmp	d, %rax
 	jae	L(ufx)
@@ -238,7 +238,7 @@ L(uend):shl	R8(%rcx), %rbp
 	mov	d, %rax
 	add	%rbp, %rax
 	cmp	%r11, %rbp
-	cmovb	%rbp, %rax
+	cmovc	%rbp, %rax
 	adc	$-1, %r13
 	cmp	d, %rax
 	jae	L(efx)
@@ -266,7 +266,7 @@ L(ftop):mul	dinv			C	      0,12   0,17   0,17
 	mov	d, %rax			C
 	add	%rdx, %rax		C	     10     14     14
 	cmp	%r11, %rdx		C	     10     14     14
-	cmovb	%rdx, %rax		C	     11     15     15
+	cmovc	%rdx, %rax		C	     11     15     15
 	adc	$-1, %r13		C
 	mov	%r13, (qp)		C
 	sub	$8, qp			C
diff --git a/mpn/x86_64/mod_1_1.asm b/mpn/x86_64/mod_1_1.asm
index 82592dc0f..fa50cece4 100644
--- a/mpn/x86_64/mod_1_1.asm
+++ b/mpn/x86_64/mod_1_1.asm
@@ -116,11 +116,11 @@ L(4):
 	mov	%rsi, %rax
 	lea	(%rsi,%rbp), %rdx
 	cmp	%rsi, %rbx
-	cmovb	%rdx, %rax
+	cmovc	%rdx, %rax
 	mov	%rax, %rdx
 	sub	%rbp, %rdx
 	cmp	%rbp, %rax
-	cmovae	%rdx, %rax
+	cmovnc	%rdx, %rax
 	mov	R32(%rdi), R32(%rcx)
 	shr	R8(%rcx), %rax
 	pop	%rbx
@@ -166,7 +166,7 @@ L(z):	mul	%r8
 	imul	%r12, %rdx
 	add	%rdx, %r12
 	cmp	%rdx, %rax
-	cmovb	%r12, %rdx
+	cmovc	%r12, %rdx
 	shr	R8(%rcx), %r8
 	shr	R8(%rcx), %rdx
 	mov	%r8, 16(%rbx)		C store B1modb
diff --git a/mpn/x86_64/mod_1_2.asm b/mpn/x86_64/mod_1_2.asm
index c7102e877..4533cbe45 100644
--- a/mpn/x86_64/mod_1_2.asm
+++ b/mpn/x86_64/mod_1_2.asm
@@ -133,10 +133,10 @@ L(1):	xor	R32(%rcx), R32(%rcx)
 	sub	%rdx, %r8
 	lea	(%r8,%r14), %rax
 	cmp	%r8, %rsi
-	cmovb	%rax, %r8
+	cmovc	%rax, %r8
 	mov	%r8, %rax
 	sub	%r14, %rax
-	cmovb	%r8, %rax
+	cmovc	%r8, %rax
 	mov	R32(%rdi), R32(%rcx)
 	shr	R8(%rcx), %rax
 	pop	%rbx
@@ -193,7 +193,7 @@ ifdef(`SHLD_SLOW',`
 	imul	%r12, %rdx
 	lea	(%rdx,%r12), %rsi
 	cmp	%rdx, %rax
-	cmovae	%rdx, %rsi
+	cmovnc	%rdx, %rsi
 	mov	%r11, %rax
 	mul	%rsi
 
@@ -205,7 +205,7 @@ ifdef(`SHLD_SLOW',`
 	imul	%r12, %rdx
 	add	%rdx, %r12
 	cmp	%rdx, %rax
-	cmovae	%rdx, %r12
+	cmovnc	%rdx, %r12
 
 	shr	R8(%rcx), %r12
 	mov	%r12, 32(%rbx)		C store B3modb
diff --git a/mpn/x86_64/mod_1_4.asm b/mpn/x86_64/mod_1_4.asm
index 2d03759b1..d99080d7f 100644
--- a/mpn/x86_64/mod_1_4.asm
+++ b/mpn/x86_64/mod_1_4.asm
@@ -145,10 +145,10 @@ L(end):	mov	8(%r14), R32(%rsi)
 	sub	%rdx, %r8
 	lea	(%r8,%rbx), %rax
 	cmp	%r8, %r9
-	cmovb	%rax, %r8
+	cmovc	%rax, %r8
 	mov	%r8, %rax
 	sub	%rbx, %rax
-	cmovb	%r8, %rax
+	cmovc	%r8, %rax
 	shr	R8(%rcx), %rax
 	pop	%rbx
 	pop	%rbp
@@ -200,7 +200,7 @@ ifdef(`SHLD_SLOW',`
 	imul	%r12, %rdx
 	lea	(%rdx,%r12), %rsi
 	cmp	%rdx, %rax
-	cmovae	%rdx, %rsi
+	cmovnc	%rdx, %rsi
 	mov	%r11, %rax
 	mul	%rsi
 
@@ -212,7 +212,7 @@ ifdef(`SHLD_SLOW',`
 	imul	%r12, %rdx
 	lea	(%rdx,%r12), %rsi
 	cmp	%rdx, %rax
-	cmovae	%rdx, %rsi
+	cmovnc	%rdx, %rsi
 	mov	%r11, %rax
 	mul	%rsi
 
@@ -224,7 +224,7 @@ ifdef(`SHLD_SLOW',`
 	imul	%r12, %rdx
 	lea	(%rdx,%r12), %rsi
 	cmp	%rdx, %rax
-	cmovae	%rdx, %rsi
+	cmovnc	%rdx, %rsi
 	mov	%r11, %rax
 	mul	%rsi
 
@@ -236,7 +236,7 @@ ifdef(`SHLD_SLOW',`
 	imul	%r12, %rdx
 	add	%rdx, %r12
 	cmp	%rdx, %rax
-	cmovae	%rdx, %r12
+	cmovnc	%rdx, %r12
 
 	shr	R8(%rcx), %r12
 	mov	%r12, 48(%rbx)		C store B5modb
author	Torbjorn Granlund <tege@gmplib.org>	2010-12-19 15:56:09 +0100
committer	Torbjorn Granlund <tege@gmplib.org>	2010-12-19 15:56:09 +0100
commit	14366960d27e4942374ce82296bd4d2192580066 (patch)
tree	245854e94ffe2fd211faefed7b041170fd2cec4e
parent	da16f25e67722a20c21cfd8132e398780264953c (diff)
download	gmp-14366960d27e4942374ce82296bd4d2192580066.tar.gz