summaryrefslogtreecommitdiff
path: root/mpn/x86_64/core2/lshift.asm
diff options
context:
space:
mode:
Diffstat (limited to 'mpn/x86_64/core2/lshift.asm')
-rw-r--r--mpn/x86_64/core2/lshift.asm39
1 files changed, 23 insertions, 16 deletions
diff --git a/mpn/x86_64/core2/lshift.asm b/mpn/x86_64/core2/lshift.asm
index 3b17e8315..2e175de76 100644
--- a/mpn/x86_64/core2/lshift.asm
+++ b/mpn/x86_64/core2/lshift.asm
@@ -1,6 +1,6 @@
dnl x86-64 mpn_lshift optimized for "Core 2".
-dnl Copyright 2007, 2009 Free Software Foundation, Inc.
+dnl Copyright 2007, 2009, 2011 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -35,12 +35,16 @@ C INPUT PARAMETERS
define(`rp', `%rdi')
define(`up', `%rsi')
define(`n', `%rdx')
-define(`cnt', `%cl')
+define(`cnt', `%rcx')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(ELF64)
ASM_START()
TEXT
ALIGN(16)
PROLOGUE(mpn_lshift)
+ DOS64_ENTRY(4)
lea -8(rp,n,8), rp
lea -8(up,n,8), up
@@ -51,7 +55,7 @@ L(b00): C n = 4, 8, 12, ...
mov (up), %r10
mov -8(up), %r11
xor R32(%rax), R32(%rax)
- shld R8(%rcx), %r10, %rax
+ shld R8(cnt), %r10, %rax
mov -16(up), %r8
lea 24(rp), rp
sub $4, n
@@ -62,7 +66,7 @@ L(nb00):C n = 1, 5, 9, ...
jae L(nb01)
L(b01): mov (up), %r9
xor R32(%rax), R32(%rax)
- shld R8(%rcx), %r9, %rax
+ shld R8(cnt), %r9, %rax
sub $2, n
jb L(le1)
mov -8(up), %r10
@@ -70,8 +74,9 @@ L(b01): mov (up), %r9
lea -8(up), up
lea 16(rp), rp
jmp L(01)
-L(le1): shl R8(%rcx), %r9
+L(le1): shl R8(cnt), %r9
mov %r9, (rp)
+ DOS64_EXIT()
ret
L(nb01):C n = 2, 6, 10, ...
@@ -79,17 +84,18 @@ L(nb01):C n = 2, 6, 10, ...
L(b10): mov (up), %r8
mov -8(up), %r9
xor R32(%rax), R32(%rax)
- shld R8(%rcx), %r8, %rax
+ shld R8(cnt), %r8, %rax
sub $3, n
jb L(le2)
mov -16(up), %r10
lea -16(up), up
lea 8(rp), rp
jmp L(10)
-L(le2): shld R8(%rcx), %r9, %r8
+L(le2): shld R8(cnt), %r9, %r8
mov %r8, (rp)
- shl R8(%rcx), %r9
+ shl R8(cnt), %r9
mov %r9, -8(rp)
+ DOS64_EXIT()
ret
ALIGN(16) C performance critical!
@@ -97,23 +103,23 @@ L(b11): C n = 3, 7, 11, ...
mov (up), %r11
mov -8(up), %r8
xor R32(%rax), R32(%rax)
- shld R8(%rcx), %r11, %rax
+ shld R8(cnt), %r11, %rax
mov -16(up), %r9
lea -24(up), up
sub $4, n
jb L(end)
ALIGN(16)
-L(top): shld R8(%rcx), %r8, %r11
+L(top): shld R8(cnt), %r8, %r11
mov (up), %r10
mov %r11, (rp)
-L(10): shld R8(%rcx), %r9, %r8
+L(10): shld R8(cnt), %r9, %r8
mov -8(up), %r11
mov %r8, -8(rp)
-L(01): shld R8(%rcx), %r10, %r9
+L(01): shld R8(cnt), %r10, %r9
mov -16(up), %r8
mov %r9, -16(rp)
-L(00): shld R8(%rcx), %r11, %r10
+L(00): shld R8(cnt), %r11, %r10
mov -24(up), %r9
mov %r10, -24(rp)
add $-32, up
@@ -121,11 +127,12 @@ L(00): shld R8(%rcx), %r11, %r10
sub $4, n
jnc L(top)
-L(end): shld R8(%rcx), %r8, %r11
+L(end): shld R8(cnt), %r8, %r11
mov %r11, (rp)
- shld R8(%rcx), %r9, %r8
+ shld R8(cnt), %r9, %r8
mov %r8, -8(rp)
- shl R8(%rcx), %r9
+ shl R8(cnt), %r9
mov %r9, -16(rp)
+ DOS64_EXIT()
ret
EPILOGUE()