summaryrefslogtreecommitdiff
path: root/mpn/x86_64/core2/rshift.asm
diff options
context:
space:
mode:
Diffstat (limited to 'mpn/x86_64/core2/rshift.asm')
-rw-r--r--mpn/x86_64/core2/rshift.asm39
1 files changed, 23 insertions, 16 deletions
diff --git a/mpn/x86_64/core2/rshift.asm b/mpn/x86_64/core2/rshift.asm
index 38a77364f..68306881c 100644
--- a/mpn/x86_64/core2/rshift.asm
+++ b/mpn/x86_64/core2/rshift.asm
@@ -1,6 +1,6 @@
dnl x86-64 mpn_rshift optimized for "Core 2".
-dnl Copyright 2007, 2009 Free Software Foundation, Inc.
+dnl Copyright 2007, 2009, 2011 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -35,12 +35,16 @@ C INPUT PARAMETERS
define(`rp', `%rdi')
define(`up', `%rsi')
define(`n', `%rdx')
-define(`cnt', `%cl')
+define(`cnt', `%rcx')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(ELF64)
ASM_START()
TEXT
ALIGN(16)
PROLOGUE(mpn_rshift)
+ DOS64_ENTRY(4)
mov R32(%rdx), R32(%rax)
and $3, R32(%rax)
jne L(nb00)
@@ -48,7 +52,7 @@ L(b00): C n = 4, 8, 12, ...
mov (up), %r10
mov 8(up), %r11
xor R32(%rax), R32(%rax)
- shrd R8(%rcx), %r10, %rax
+ shrd R8(cnt), %r10, %rax
mov 16(up), %r8
lea 8(up), up
lea -24(rp), rp
@@ -60,7 +64,7 @@ L(nb00):C n = 1, 5, 9, ...
jae L(nb01)
L(b01): mov (up), %r9
xor R32(%rax), R32(%rax)
- shrd R8(%rcx), %r9, %rax
+ shrd R8(cnt), %r9, %rax
sub $2, n
jb L(le1)
mov 8(up), %r10
@@ -68,8 +72,9 @@ L(b01): mov (up), %r9
lea 16(up), up
lea -16(rp), rp
jmp L(01)
-L(le1): shr R8(%rcx), %r9
+L(le1): shr R8(cnt), %r9
mov %r9, (rp)
+ DOS64_EXIT()
ret
L(nb01):C n = 2, 6, 10, ...
@@ -77,17 +82,18 @@ L(nb01):C n = 2, 6, 10, ...
L(b10): mov (up), %r8
mov 8(up), %r9
xor R32(%rax), R32(%rax)
- shrd R8(%rcx), %r8, %rax
+ shrd R8(cnt), %r8, %rax
sub $3, n
jb L(le2)
mov 16(up), %r10
lea 24(up), up
lea -8(rp), rp
jmp L(10)
-L(le2): shrd R8(%rcx), %r9, %r8
+L(le2): shrd R8(cnt), %r9, %r8
mov %r8, (rp)
- shr R8(%rcx), %r9
+ shr R8(cnt), %r9
mov %r9, 8(rp)
+ DOS64_EXIT()
ret
ALIGN(16)
@@ -95,23 +101,23 @@ L(b11): C n = 3, 7, 11, ...
mov (up), %r11
mov 8(up), %r8
xor R32(%rax), R32(%rax)
- shrd R8(%rcx), %r11, %rax
+ shrd R8(cnt), %r11, %rax
mov 16(up), %r9
lea 32(up), up
sub $4, n
jb L(end)
ALIGN(16)
-L(top): shrd R8(%rcx), %r8, %r11
+L(top): shrd R8(cnt), %r8, %r11
mov -8(up), %r10
mov %r11, (rp)
-L(10): shrd R8(%rcx), %r9, %r8
+L(10): shrd R8(cnt), %r9, %r8
mov (up), %r11
mov %r8, 8(rp)
-L(01): shrd R8(%rcx), %r10, %r9
+L(01): shrd R8(cnt), %r10, %r9
mov 8(up), %r8
mov %r9, 16(rp)
-L(00): shrd R8(%rcx), %r11, %r10
+L(00): shrd R8(cnt), %r11, %r10
mov 16(up), %r9
mov %r10, 24(rp)
add $32, up
@@ -119,11 +125,12 @@ L(00): shrd R8(%rcx), %r11, %r10
sub $4, n
jnc L(top)
-L(end): shrd R8(%rcx), %r8, %r11
+L(end): shrd R8(cnt), %r8, %r11
mov %r11, (rp)
- shrd R8(%rcx), %r9, %r8
+ shrd R8(cnt), %r9, %r8
mov %r8, 8(rp)
- shr R8(%rcx), %r9
+ shr R8(cnt), %r9
mov %r9, 16(rp)
+ DOS64_EXIT()
ret
EPILOGUE()