diff options
Diffstat (limited to 'mpn/x86/k7')
-rw-r--r-- | mpn/x86/k7/addlsh1_n.asm | 6 | ||||
-rw-r--r-- | mpn/x86/k7/invert_limb.asm | 2 | ||||
-rw-r--r-- | mpn/x86/k7/sublsh1_n.asm | 8 |
3 files changed, 8 insertions, 8 deletions
diff --git a/mpn/x86/k7/addlsh1_n.asm b/mpn/x86/k7/addlsh1_n.asm index e5163b676..05df4a740 100644 --- a/mpn/x86/k7/addlsh1_n.asm +++ b/mpn/x86/k7/addlsh1_n.asm @@ -44,14 +44,14 @@ C AMD K8 C This is a basic addlsh1_n for k7, atom, and perhaps some other x86-32 C processors. It uses 2*3-way unrolling, for good reasons. Unfortunately, C that means we need an initial magic multiply. -C +C C It is not clear how to do sublsh1_n or rsblsh1_n using the same pattern. We C cannot do rsblsh1_n since we feed carry from the shift blocks to the C add/subtract blocks, which is right for addition but reversed for C subtraction. We could perhaps do sublsh1_n, with some extra move insns, C without losing any time, since we're not issue limited but carry recurrency C latency. -C +C C Breaking carry recurrency might be a good idea. We would then need separate C registers for the shift carry and add/subtract carry, which in turn would C force is to 2*2-way unrolling. @@ -120,7 +120,7 @@ ifdef(`CPU_P6',` L(exact): incl VAR_COUNT jz L(end) - + ALIGN(16) L(top): ifdef(`CPU_P6',` diff --git a/mpn/x86/k7/invert_limb.asm b/mpn/x86/k7/invert_limb.asm index da6f28397..435fa96d0 100644 --- a/mpn/x86/k7/invert_limb.asm +++ b/mpn/x86/k7/invert_limb.asm @@ -60,7 +60,7 @@ ifdef(`DARWIN',` PROLOGUE(mpn_invert_limb) deflit(`FRAME', 0) mov PARAM_DIVISOR, %eax - C Avoid push/pop on k7. + C Avoid push/pop on k7. sub $8, %esp FRAME_subl_esp(8) mov %ebx, (%esp) mov %edi, 4(%esp) diff --git a/mpn/x86/k7/sublsh1_n.asm b/mpn/x86/k7/sublsh1_n.asm index 41993f99a..965348586 100644 --- a/mpn/x86/k7/sublsh1_n.asm +++ b/mpn/x86/k7/sublsh1_n.asm @@ -30,7 +30,7 @@ C cycles/limb C P5 C P6 model 0-8,10-12 C P6 model 9 (Banias) -C P6 model 13 (Dothan) +C P6 model 13 (Dothan) C P4 model 0 (Willamette) C P4 model 1 (?) C P4 model 2 (Northwood) @@ -38,12 +38,12 @@ C P4 model 3 (Prescott) C P4 model 4 (Nocona) C Intel Atom 6.75 C AMD K6 -C AMD K7 +C AMD K7 C AMD K8 C This is a basic sublsh1_n for k7, atom, and perhaps some other x86-32 C processors. It uses 2*4-way unrolling, for good reasons. -C +C C Breaking carry recurrency might be a good idea. We would then need separate C registers for the shift carry and add/subtract carry, which in turn would C force is to 2*2-way unrolling. @@ -114,7 +114,7 @@ ifdef(`CPU_P6',` adc %ebp, %ebp rcr %edx C restore 1st saved carry bit - + sbb %eax, (rp) sbb %ebx, 4(rp) sbb %ecx, 8(rp) |