diff options
Diffstat (limited to 'mpn/x86_64/core2/lshiftc.asm')
-rw-r--r-- | mpn/x86_64/core2/lshiftc.asm | 39 |
1 files changed, 23 insertions, 16 deletions
diff --git a/mpn/x86_64/core2/lshiftc.asm b/mpn/x86_64/core2/lshiftc.asm index a19f72297..31a08f7ae 100644 --- a/mpn/x86_64/core2/lshiftc.asm +++ b/mpn/x86_64/core2/lshiftc.asm @@ -1,6 +1,6 @@ dnl x86-64 mpn_lshiftc optimized for "Core 2". -dnl Copyright 2007, 2009 Free Software Foundation, Inc. +dnl Copyright 2007, 2009, 2011 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -35,12 +35,16 @@ C INPUT PARAMETERS define(`rp', `%rdi') define(`up', `%rsi') define(`n', `%rdx') -define(`cnt', `%cl') +define(`cnt', `%rcx') + +ABI_SUPPORT(DOS64) +ABI_SUPPORT(ELF64) ASM_START() TEXT ALIGN(16) PROLOGUE(mpn_lshiftc) + DOS64_ENTRY(4) lea -8(rp,n,8), rp lea -8(up,n,8), up @@ -51,7 +55,7 @@ L(b00): C n = 4, 8, 12, ... mov (up), %r10 mov -8(up), %r11 xor R32(%rax), R32(%rax) - shld R8(%rcx), %r10, %rax + shld R8(cnt), %r10, %rax mov -16(up), %r8 lea 24(rp), rp sub $4, n @@ -62,7 +66,7 @@ L(nb00):C n = 1, 5, 9, ... jae L(nb01) L(b01): mov (up), %r9 xor R32(%rax), R32(%rax) - shld R8(%rcx), %r9, %rax + shld R8(cnt), %r9, %rax sub $2, n jb L(le1) mov -8(up), %r10 @@ -70,9 +74,10 @@ L(b01): mov (up), %r9 lea -8(up), up lea 16(rp), rp jmp L(01) -L(le1): shl R8(%rcx), %r9 +L(le1): shl R8(cnt), %r9 not %r9 mov %r9, (rp) + DOS64_EXIT() ret L(nb01):C n = 2, 6, 10, ... @@ -80,19 +85,20 @@ L(nb01):C n = 2, 6, 10, ... L(b10): mov (up), %r8 mov -8(up), %r9 xor R32(%rax), R32(%rax) - shld R8(%rcx), %r8, %rax + shld R8(cnt), %r8, %rax sub $3, n jb L(le2) mov -16(up), %r10 lea -16(up), up lea 8(rp), rp jmp L(10) -L(le2): shld R8(%rcx), %r9, %r8 +L(le2): shld R8(cnt), %r9, %r8 not %r8 mov %r8, (rp) - shl R8(%rcx), %r9 + shl R8(cnt), %r9 not %r9 mov %r9, -8(rp) + DOS64_EXIT() ret ALIGN(16) C performance critical! @@ -100,26 +106,26 @@ L(b11): C n = 3, 7, 11, ... mov (up), %r11 mov -8(up), %r8 xor R32(%rax), R32(%rax) - shld R8(%rcx), %r11, %rax + shld R8(cnt), %r11, %rax mov -16(up), %r9 lea -24(up), up sub $4, n jb L(end) ALIGN(16) -L(top): shld R8(%rcx), %r8, %r11 +L(top): shld R8(cnt), %r8, %r11 mov (up), %r10 not %r11 mov %r11, (rp) -L(10): shld R8(%rcx), %r9, %r8 +L(10): shld R8(cnt), %r9, %r8 mov -8(up), %r11 not %r8 mov %r8, -8(rp) -L(01): shld R8(%rcx), %r10, %r9 +L(01): shld R8(cnt), %r10, %r9 mov -16(up), %r8 not %r9 mov %r9, -16(rp) -L(00): shld R8(%rcx), %r11, %r10 +L(00): shld R8(cnt), %r11, %r10 mov -24(up), %r9 not %r10 mov %r10, -24(rp) @@ -128,14 +134,15 @@ L(00): shld R8(%rcx), %r11, %r10 sub $4, n jnc L(top) -L(end): shld R8(%rcx), %r8, %r11 +L(end): shld R8(cnt), %r8, %r11 not %r11 mov %r11, (rp) - shld R8(%rcx), %r9, %r8 + shld R8(cnt), %r9, %r8 not %r8 mov %r8, -8(rp) - shl R8(%rcx), %r9 + shl R8(cnt), %r9 not %r9 mov %r9, -16(rp) + DOS64_EXIT() ret EPILOGUE() |