diff options
author | Torbjorn Granlund <tege@gmplib.org> | 2010-12-19 15:56:09 +0100 |
---|---|---|
committer | Torbjorn Granlund <tege@gmplib.org> | 2010-12-19 15:56:09 +0100 |
commit | 14366960d27e4942374ce82296bd4d2192580066 (patch) | |
tree | 245854e94ffe2fd211faefed7b041170fd2cec4e | |
parent | da16f25e67722a20c21cfd8132e398780264953c (diff) | |
download | gmp-14366960d27e4942374ce82296bd4d2192580066.tar.gz |
Canonicalise cmov forms.
-rw-r--r-- | ChangeLog | 10 | ||||
-rw-r--r-- | mpn/x86/k7/mod_1_1.asm | 6 | ||||
-rw-r--r-- | mpn/x86/k7/mod_1_4.asm | 12 | ||||
-rw-r--r-- | mpn/x86/pentium4/sse2/mod_1_1.asm | 2 | ||||
-rw-r--r-- | mpn/x86/pentium4/sse2/mod_1_4.asm | 8 | ||||
-rw-r--r-- | mpn/x86_64/core2/divrem_1.asm | 4 | ||||
-rw-r--r-- | mpn/x86_64/divrem_1.asm | 10 | ||||
-rw-r--r-- | mpn/x86_64/mod_1_1.asm | 6 | ||||
-rw-r--r-- | mpn/x86_64/mod_1_2.asm | 8 | ||||
-rw-r--r-- | mpn/x86_64/mod_1_4.asm | 12 |
10 files changed, 44 insertions, 34 deletions
@@ -1,5 +1,15 @@ 2010-12-19 Torbjorn Granlund <tege@gmplib.org> + * mpn/x86/k7/mod_1_1.asm: Canonicalise cmov forms. + * mpn/x86/k7/mod_1_4.asm: Likewise. + * mpn/x86/pentium4/sse2/mod_1_1.asm: Likewise. + * mpn/x86/pentium4/sse2/mod_1_4.asm: Likewise. + * mpn/x86_64/core2/divrem_1.asm: Likewise. + * mpn/x86_64/divrem_1.asm: Likewise. + * mpn/x86_64/mod_1_1.asm: Likewise. + * mpn/x86_64/mod_1_2.asm: Likewise. + * mpn/x86_64/mod_1_4.asm: Likewise. + * mpn/x86/k7/gcd_1.asm: Rewrite. Remove slow 'div' loop. Call mpn_mod_1 for operands with mode than BMOD_1_TO_MOD_1_THRESHOLD limbs. Misc cleanups. diff --git a/mpn/x86/k7/mod_1_1.asm b/mpn/x86/k7/mod_1_1.asm index f7a6706bd..648bcf939 100644 --- a/mpn/x86/k7/mod_1_1.asm +++ b/mpn/x86/k7/mod_1_1.asm @@ -101,10 +101,10 @@ L(nrm): lea 1(%eax), %esi sub %edx, %eax lea (%eax,%ebp), %edx cmp %eax, %ebx - cmovb( %edx, %eax) + cmovc( %edx, %eax) mov %eax, %edx sub %ebp, %eax - cmovb( %edx, %eax) + cmovc( %edx, %eax) pop %ebx pop %esi pop %edi @@ -140,7 +140,7 @@ C CAUTION: This is the same code as in pentium4/sse2//mod_1_1.asm imul %ebp, %edx add %edx, %ebp cmp %edx, %eax - cmovb( %ebp, %edx) + cmovc( %ebp, %edx) shr %cl, %ebx mov %ebx, 8(%esi) C store B1modb shr %cl, %edx diff --git a/mpn/x86/k7/mod_1_4.asm b/mpn/x86/k7/mod_1_4.asm index 4ff450112..831482f42 100644 --- a/mpn/x86/k7/mod_1_4.asm +++ b/mpn/x86/k7/mod_1_4.asm @@ -153,10 +153,10 @@ L(end): mov 4(%esp), %eax sub %edx, %eax lea (%eax,%ebp), %edx cmp %eax, %ebx - cmovb( %edx, %eax) + cmovc( %edx, %eax) mov %eax, %edx sub %ebp, %eax - cmovb( %edx, %eax) + cmovc( %edx, %eax) add $28, %esp pop %ebx pop %esi @@ -200,7 +200,7 @@ C CAUTION: This is the same code as in pentium4/sse2/mod_1_4.asm imul %ebx, %edx lea (%edx,%ebx), %esi cmp %edx, %eax - cmovae( %edx, %esi) + cmovnc( %edx, %esi) mov %edi, %eax mul %esi @@ -212,7 +212,7 @@ C CAUTION: This is the same code as in pentium4/sse2/mod_1_4.asm imul %ebx, %edx lea (%edx,%ebx), %esi cmp %edx, %eax - cmovae( %edx, %esi) + cmovnc( %edx, %esi) mov %edi, %eax mul %esi @@ -224,7 +224,7 @@ C CAUTION: This is the same code as in pentium4/sse2/mod_1_4.asm imul %ebx, %edx lea (%edx,%ebx), %esi cmp %edx, %eax - cmovae( %edx, %esi) + cmovnc( %edx, %esi) mov %edi, %eax mul %esi @@ -236,7 +236,7 @@ C CAUTION: This is the same code as in pentium4/sse2/mod_1_4.asm imul %ebx, %edx add %edx, %ebx cmp %edx, %eax - cmovae( %edx, %ebx) + cmovnc( %edx, %ebx) shr %cl, %ebx mov %ebx, 24(%ebp) C store B5modb diff --git a/mpn/x86/pentium4/sse2/mod_1_1.asm b/mpn/x86/pentium4/sse2/mod_1_1.asm index bd41598aa..1c1a64687 100644 --- a/mpn/x86/pentium4/sse2/mod_1_1.asm +++ b/mpn/x86/pentium4/sse2/mod_1_1.asm @@ -143,7 +143,7 @@ C CAUTION: This is the same code as in k7/mod_1_1.asm imul %ebp, %edx add %edx, %ebp cmp %edx, %eax - cmovb( %ebp, %edx) + cmovc( %ebp, %edx) shr %cl, %ebx mov %ebx, 8(%esi) C store B1modb shr %cl, %edx diff --git a/mpn/x86/pentium4/sse2/mod_1_4.asm b/mpn/x86/pentium4/sse2/mod_1_4.asm index b62c7cc3f..edf0affd4 100644 --- a/mpn/x86/pentium4/sse2/mod_1_4.asm +++ b/mpn/x86/pentium4/sse2/mod_1_4.asm @@ -209,7 +209,7 @@ C CAUTION: This is the same code as in k7/mod_1_4.asm imul %ebx, %edx lea (%edx,%ebx), %esi cmp %edx, %eax - cmovae( %edx, %esi) + cmovnc( %edx, %esi) mov %edi, %eax mul %esi @@ -221,7 +221,7 @@ C CAUTION: This is the same code as in k7/mod_1_4.asm imul %ebx, %edx lea (%edx,%ebx), %esi cmp %edx, %eax - cmovae( %edx, %esi) + cmovnc( %edx, %esi) mov %edi, %eax mul %esi @@ -233,7 +233,7 @@ C CAUTION: This is the same code as in k7/mod_1_4.asm imul %ebx, %edx lea (%edx,%ebx), %esi cmp %edx, %eax - cmovae( %edx, %esi) + cmovnc( %edx, %esi) mov %edi, %eax mul %esi @@ -245,7 +245,7 @@ C CAUTION: This is the same code as in k7/mod_1_4.asm imul %ebx, %edx add %edx, %ebx cmp %edx, %eax - cmovae( %edx, %ebx) + cmovnc( %edx, %ebx) shr %cl, %ebx mov %ebx, 24(%ebp) C store B5modb diff --git a/mpn/x86_64/core2/divrem_1.asm b/mpn/x86_64/core2/divrem_1.asm index 9223836f0..05822fdda 100644 --- a/mpn/x86_64/core2/divrem_1.asm +++ b/mpn/x86_64/core2/divrem_1.asm @@ -169,7 +169,7 @@ L(end): lea 1(%rax), %r11 mov d, %rax add %rbp, %rax cmp %r11, %rbp - cmovb %rbp, %rax + cmovc %rbp, %rax adc $-1, %r13 cmp d, %rax jae L(efx) @@ -197,7 +197,7 @@ L(ftop):mul dinv C 0,12 0,17 0,17 mov d, %rax C add %rdx, %rax C 10 14 14 cmp %r11, %rdx C 10 14 14 - cmovb %rdx, %rax C 11 15 15 + cmovc %rdx, %rax C 11 15 15 adc $-1, %r13 C mov %r13, (qp) C sub $8, qp C diff --git a/mpn/x86_64/divrem_1.asm b/mpn/x86_64/divrem_1.asm index acfa9e91f..d55501944 100644 --- a/mpn/x86_64/divrem_1.asm +++ b/mpn/x86_64/divrem_1.asm @@ -109,7 +109,7 @@ L(normalized): dec un mov %rbp, %rax sub d, %rbp - cmovb %rax, %rbp + cmovc %rax, %rbp sbb R32(%rax), R32(%rax) inc R32(%rax) mov %rax, (qp) @@ -141,7 +141,7 @@ L(ntop): C K8-K10 P6-CNR P6-NHM P4 mov d, %rax C add %r10, %rax C 11 17 15 34 cmp %rbp, %r10 C 11 17 15 34 - cmovb %r10, %rax C 12 18 16 35 + cmovc %r10, %rax C 12 18 16 35 adc $-1, %r13 C cmp d, %rax C jae L(nfx) C @@ -216,7 +216,7 @@ L(utop):mov (up,un,8), %r10 mov d, %rax add %rbp, %rax cmp %r11, %rbp - cmovb %rbp, %rax + cmovc %rbp, %rax adc $-1, %r13 cmp d, %rax jae L(ufx) @@ -238,7 +238,7 @@ L(uend):shl R8(%rcx), %rbp mov d, %rax add %rbp, %rax cmp %r11, %rbp - cmovb %rbp, %rax + cmovc %rbp, %rax adc $-1, %r13 cmp d, %rax jae L(efx) @@ -266,7 +266,7 @@ L(ftop):mul dinv C 0,12 0,17 0,17 mov d, %rax C add %rdx, %rax C 10 14 14 cmp %r11, %rdx C 10 14 14 - cmovb %rdx, %rax C 11 15 15 + cmovc %rdx, %rax C 11 15 15 adc $-1, %r13 C mov %r13, (qp) C sub $8, qp C diff --git a/mpn/x86_64/mod_1_1.asm b/mpn/x86_64/mod_1_1.asm index 82592dc0f..fa50cece4 100644 --- a/mpn/x86_64/mod_1_1.asm +++ b/mpn/x86_64/mod_1_1.asm @@ -116,11 +116,11 @@ L(4): mov %rsi, %rax lea (%rsi,%rbp), %rdx cmp %rsi, %rbx - cmovb %rdx, %rax + cmovc %rdx, %rax mov %rax, %rdx sub %rbp, %rdx cmp %rbp, %rax - cmovae %rdx, %rax + cmovnc %rdx, %rax mov R32(%rdi), R32(%rcx) shr R8(%rcx), %rax pop %rbx @@ -166,7 +166,7 @@ L(z): mul %r8 imul %r12, %rdx add %rdx, %r12 cmp %rdx, %rax - cmovb %r12, %rdx + cmovc %r12, %rdx shr R8(%rcx), %r8 shr R8(%rcx), %rdx mov %r8, 16(%rbx) C store B1modb diff --git a/mpn/x86_64/mod_1_2.asm b/mpn/x86_64/mod_1_2.asm index c7102e877..4533cbe45 100644 --- a/mpn/x86_64/mod_1_2.asm +++ b/mpn/x86_64/mod_1_2.asm @@ -133,10 +133,10 @@ L(1): xor R32(%rcx), R32(%rcx) sub %rdx, %r8 lea (%r8,%r14), %rax cmp %r8, %rsi - cmovb %rax, %r8 + cmovc %rax, %r8 mov %r8, %rax sub %r14, %rax - cmovb %r8, %rax + cmovc %r8, %rax mov R32(%rdi), R32(%rcx) shr R8(%rcx), %rax pop %rbx @@ -193,7 +193,7 @@ ifdef(`SHLD_SLOW',` imul %r12, %rdx lea (%rdx,%r12), %rsi cmp %rdx, %rax - cmovae %rdx, %rsi + cmovnc %rdx, %rsi mov %r11, %rax mul %rsi @@ -205,7 +205,7 @@ ifdef(`SHLD_SLOW',` imul %r12, %rdx add %rdx, %r12 cmp %rdx, %rax - cmovae %rdx, %r12 + cmovnc %rdx, %r12 shr R8(%rcx), %r12 mov %r12, 32(%rbx) C store B3modb diff --git a/mpn/x86_64/mod_1_4.asm b/mpn/x86_64/mod_1_4.asm index 2d03759b1..d99080d7f 100644 --- a/mpn/x86_64/mod_1_4.asm +++ b/mpn/x86_64/mod_1_4.asm @@ -145,10 +145,10 @@ L(end): mov 8(%r14), R32(%rsi) sub %rdx, %r8 lea (%r8,%rbx), %rax cmp %r8, %r9 - cmovb %rax, %r8 + cmovc %rax, %r8 mov %r8, %rax sub %rbx, %rax - cmovb %r8, %rax + cmovc %r8, %rax shr R8(%rcx), %rax pop %rbx pop %rbp @@ -200,7 +200,7 @@ ifdef(`SHLD_SLOW',` imul %r12, %rdx lea (%rdx,%r12), %rsi cmp %rdx, %rax - cmovae %rdx, %rsi + cmovnc %rdx, %rsi mov %r11, %rax mul %rsi @@ -212,7 +212,7 @@ ifdef(`SHLD_SLOW',` imul %r12, %rdx lea (%rdx,%r12), %rsi cmp %rdx, %rax - cmovae %rdx, %rsi + cmovnc %rdx, %rsi mov %r11, %rax mul %rsi @@ -224,7 +224,7 @@ ifdef(`SHLD_SLOW',` imul %r12, %rdx lea (%rdx,%r12), %rsi cmp %rdx, %rax - cmovae %rdx, %rsi + cmovnc %rdx, %rsi mov %r11, %rax mul %rsi @@ -236,7 +236,7 @@ ifdef(`SHLD_SLOW',` imul %r12, %rdx add %rdx, %r12 cmp %rdx, %rax - cmovae %rdx, %r12 + cmovnc %rdx, %r12 shr R8(%rcx), %r12 mov %r12, 48(%rbx) C store B5modb |