summaryrefslogtreecommitdiff
path: root/mpn/x86
diff options
context:
space:
mode:
Diffstat (limited to 'mpn/x86')
-rw-r--r--mpn/x86/atom/lshift.asm4
-rw-r--r--mpn/x86/atom/sse2/mul_1.asm2
-rw-r--r--mpn/x86/bdiv_dbm1c.asm4
-rw-r--r--mpn/x86/bdiv_q_1.asm2
-rw-r--r--mpn/x86/k7/addlsh1_n.asm6
-rw-r--r--mpn/x86/k7/invert_limb.asm2
-rw-r--r--mpn/x86/k7/sublsh1_n.asm8
-rw-r--r--mpn/x86/p6/bdiv_q_1.asm4
-rw-r--r--mpn/x86/pentium/bdiv_q_1.asm2
9 files changed, 17 insertions, 17 deletions
diff --git a/mpn/x86/atom/lshift.asm b/mpn/x86/atom/lshift.asm
index d8cb8b505..1005cce59 100644
--- a/mpn/x86/atom/lshift.asm
+++ b/mpn/x86/atom/lshift.asm
@@ -160,7 +160,7 @@ deflit(`FRAME',4)
shr $2, %eax C (size + 3) / 4
and $3, %edx C (size - 1) % 4
jz L(goloop) C jmp if size == 1 (mod 4)
- shr %edx
+ shr %edx
jnc L(odd) C jum if size == 3 (mod 4)
add %ecx, %ecx
@@ -173,7 +173,7 @@ deflit(`FRAME',4)
jnz L(goloop) C jump if size == 0 (mod 4)
L(odd): lea -8(up), up
lea -8(rp), rp
- jmp L(sentry) C reached if size == 2 or 3 (mod 4)
+ jmp L(sentry) C reached if size == 2 or 3 (mod 4)
L(sloop):
adc %ecx, %ecx
diff --git a/mpn/x86/atom/sse2/mul_1.asm b/mpn/x86/atom/sse2/mul_1.asm
index dd9b95366..5cd86caec 100644
--- a/mpn/x86/atom/sse2/mul_1.asm
+++ b/mpn/x86/atom/sse2/mul_1.asm
@@ -62,7 +62,7 @@ EPILOGUE()
PROLOGUE(mpn_mul_1)
pxor %mm6, %mm6
L(ent): push %esi FRAME_pushl()
- mov PARAM_SRC, up
+ mov PARAM_SRC, up
mov PARAM_SIZE, %eax C size
movd PARAM_MUL, %mm7
movd (up), %mm0
diff --git a/mpn/x86/bdiv_dbm1c.asm b/mpn/x86/bdiv_dbm1c.asm
index 201ef173d..ac9faf270 100644
--- a/mpn/x86/bdiv_dbm1c.asm
+++ b/mpn/x86/bdiv_dbm1c.asm
@@ -24,10 +24,10 @@ C P5
C P6 model 0-8,10-12)
C P6 model 9 (Banias)
C P6 model 13 (Dothan) 5.1
-C P4 model 0 (Willamette)
+C P4 model 0 (Willamette)
C P4 model 1 (?)
C P4 model 2 (Northwood) 13.67
-C P4 model 3 (Prescott)
+C P4 model 3 (Prescott)
C P4 model 4 (Nocona)
C Intel Atom
C AMD K6
diff --git a/mpn/x86/bdiv_q_1.asm b/mpn/x86/bdiv_q_1.asm
index 2528d01f7..7f344ab57 100644
--- a/mpn/x86/bdiv_q_1.asm
+++ b/mpn/x86/bdiv_q_1.asm
@@ -30,7 +30,7 @@ C K6 14.0
C K7 12.0
C P4 42.0
-MULFUNC_PROLOGUE(mpn_bdiv_q_1 mpn_pi1_bdiv_q_1)
+MULFUNC_PROLOGUE(mpn_bdiv_q_1 mpn_pi1_bdiv_q_1)
defframe(PARAM_SHIFT, 24)
defframe(PARAM_INVERSE,20)
diff --git a/mpn/x86/k7/addlsh1_n.asm b/mpn/x86/k7/addlsh1_n.asm
index e5163b676..05df4a740 100644
--- a/mpn/x86/k7/addlsh1_n.asm
+++ b/mpn/x86/k7/addlsh1_n.asm
@@ -44,14 +44,14 @@ C AMD K8
C This is a basic addlsh1_n for k7, atom, and perhaps some other x86-32
C processors. It uses 2*3-way unrolling, for good reasons. Unfortunately,
C that means we need an initial magic multiply.
-C
+C
C It is not clear how to do sublsh1_n or rsblsh1_n using the same pattern. We
C cannot do rsblsh1_n since we feed carry from the shift blocks to the
C add/subtract blocks, which is right for addition but reversed for
C subtraction. We could perhaps do sublsh1_n, with some extra move insns,
C without losing any time, since we're not issue limited but carry recurrency
C latency.
-C
+C
C Breaking carry recurrency might be a good idea. We would then need separate
C registers for the shift carry and add/subtract carry, which in turn would
C force is to 2*2-way unrolling.
@@ -120,7 +120,7 @@ ifdef(`CPU_P6',`
L(exact):
incl VAR_COUNT
jz L(end)
-
+
ALIGN(16)
L(top):
ifdef(`CPU_P6',`
diff --git a/mpn/x86/k7/invert_limb.asm b/mpn/x86/k7/invert_limb.asm
index da6f28397..435fa96d0 100644
--- a/mpn/x86/k7/invert_limb.asm
+++ b/mpn/x86/k7/invert_limb.asm
@@ -60,7 +60,7 @@ ifdef(`DARWIN',`
PROLOGUE(mpn_invert_limb)
deflit(`FRAME', 0)
mov PARAM_DIVISOR, %eax
- C Avoid push/pop on k7.
+ C Avoid push/pop on k7.
sub $8, %esp FRAME_subl_esp(8)
mov %ebx, (%esp)
mov %edi, 4(%esp)
diff --git a/mpn/x86/k7/sublsh1_n.asm b/mpn/x86/k7/sublsh1_n.asm
index 41993f99a..965348586 100644
--- a/mpn/x86/k7/sublsh1_n.asm
+++ b/mpn/x86/k7/sublsh1_n.asm
@@ -30,7 +30,7 @@ C cycles/limb
C P5
C P6 model 0-8,10-12
C P6 model 9 (Banias)
-C P6 model 13 (Dothan)
+C P6 model 13 (Dothan)
C P4 model 0 (Willamette)
C P4 model 1 (?)
C P4 model 2 (Northwood)
@@ -38,12 +38,12 @@ C P4 model 3 (Prescott)
C P4 model 4 (Nocona)
C Intel Atom 6.75
C AMD K6
-C AMD K7
+C AMD K7
C AMD K8
C This is a basic sublsh1_n for k7, atom, and perhaps some other x86-32
C processors. It uses 2*4-way unrolling, for good reasons.
-C
+C
C Breaking carry recurrency might be a good idea. We would then need separate
C registers for the shift carry and add/subtract carry, which in turn would
C force is to 2*2-way unrolling.
@@ -114,7 +114,7 @@ ifdef(`CPU_P6',`
adc %ebp, %ebp
rcr %edx C restore 1st saved carry bit
-
+
sbb %eax, (rp)
sbb %ebx, 4(rp)
sbb %ecx, 8(rp)
diff --git a/mpn/x86/p6/bdiv_q_1.asm b/mpn/x86/p6/bdiv_q_1.asm
index 3a8733a0d..0ffbc78e4 100644
--- a/mpn/x86/p6/bdiv_q_1.asm
+++ b/mpn/x86/p6/bdiv_q_1.asm
@@ -25,7 +25,7 @@ include(`../config.m4')
C odd even divisor
C P6: 10.0 12.0 cycles/limb
-C MULFUNC_PROLOGUE(mpn_bdiv_q_1 mpn_pi1_bdiv_q_1)
+C MULFUNC_PROLOGUE(mpn_bdiv_q_1 mpn_pi1_bdiv_q_1)
C The odd case is basically the same as mpn_modexact_1_odd, just with an
C extra store, and it runs at the same 10 cycles which is the dependent
@@ -269,7 +269,7 @@ ifdef(`PIC',`
imull %edx, %eax C inv*inv*d
subl %eax, %ebp C inv = 2*inv - inv*inv*d
-
+
jmp L(common)
EPILOGUE()
diff --git a/mpn/x86/pentium/bdiv_q_1.asm b/mpn/x86/pentium/bdiv_q_1.asm
index 965173d1c..7e84fc817 100644
--- a/mpn/x86/pentium/bdiv_q_1.asm
+++ b/mpn/x86/pentium/bdiv_q_1.asm
@@ -27,7 +27,7 @@ C odd even
C P54: 24.5 30.5 cycles/limb
C P55: 23.0 28.0
-MULFUNC_PROLOGUE(mpn_bdiv_q_1 mpn_pi1_bdiv_q_1)
+MULFUNC_PROLOGUE(mpn_bdiv_q_1 mpn_pi1_bdiv_q_1)
C The P55 speeds noted above, 23 cycles odd or 28 cycles even, are as
C expected. On P54 in the even case the shrdl pairing nonsense (see