From 68afbfbde8fb3e1bc9bb31d53ce5d81f438262a1 Mon Sep 17 00:00:00 2001 From: Torbjorn Granlund Date: Mon, 28 Nov 2011 23:13:37 +0100 Subject: Support ABI DOS64. --- mpn/x86_64/aorsmul_1.asm | 51 ++++++++++++++++++++++++++++++----------- mpn/x86_64/mul_1.asm | 55 ++++++++++++++++++++++++++++++++++----------- mpn/x86_64/mul_basecase.asm | 14 ++++++++++++ mpn/x86_64/sqr_basecase.asm | 17 +++++++++++++- 4 files changed, 110 insertions(+), 27 deletions(-) diff --git a/mpn/x86_64/aorsmul_1.asm b/mpn/x86_64/aorsmul_1.asm index 9c64d56fc..a406bc9e8 100644 --- a/mpn/x86_64/aorsmul_1.asm +++ b/mpn/x86_64/aorsmul_1.asm @@ -1,6 +1,6 @@ dnl AMD64 mpn_addmul_1 and mpn_submul_1. -dnl Copyright 2003, 2004, 2005, 2007, 2008 Free Software Foundation, Inc. +dnl Copyright 2003, 2004, 2005, 2007, 2008, 2011 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. @@ -28,20 +28,27 @@ C Intel corei ? C Intel atom 21.3 C VIA nano 5.5 -C The inner loop of this code is the result of running a code generation and +C The loop of this code is the result of running a code generation and C optimization tool suite written by David Harvey and Torbjorn Granlund. -C TODO: -C * The inner loop is great, but the prologue and epilogue code was -C quickly written. Tune it! +C TODO +C * The loop is great, but the prologue and epilogue code was quickly written. +C Tune it! -C INPUT PARAMETERS -define(`rp', `%rdi') -define(`up', `%rsi') -define(`n_param',`%rdx') -define(`vl', `%rcx') +define(`rp', `%rdi') C rcx +define(`up', `%rsi') C rdx +define(`n_param', `%rdx') C r8 +define(`vl', `%rcx') C r9 -define(`n', `%r11') +define(`n', `%r11') + +ifdef(`HOST_DOS64',` + define(`IFDOS', `$1') + define(`IFELF', `') +',` + define(`IFDOS', `') + define(`IFELF', `$1') +') ifdef(`OPERATION_addmul_1',` define(`ADDSUB', `add') @@ -52,17 +59,33 @@ ifdef(`OPERATION_submul_1',` define(`func', `mpn_submul_1') ') +ABI_SUPPORT(DOS64) +ABI_SUPPORT(ELF64) + MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1) +IFDOS(` define(`up', ``%rsi'') ') dnl +IFDOS(` define(`rp', ``%rcx'') ') dnl +IFDOS(` define(`vl', ``%r9'') ') dnl +IFDOS(` define(`r9', ``rdi'') ') dnl +IFDOS(` define(`n', ``%r8'') ') dnl +IFDOS(` define(`r8', ``r11'') ') dnl + ASM_START() TEXT ALIGN(16) PROLOGUE(func) + +IFDOS(``push %rsi '') +IFDOS(``push %rdi '') +IFDOS(``mov %rdx, %rsi '') + mov (up), %rax C read first u limb early push %rbx - mov n_param, %rbx C move away n from rdx, mul uses it +IFELF(` mov n_param, %rbx ') C move away n from rdx, mul uses it +IFDOS(` mov n, %rbx ') mul vl - mov %rbx, n +IFELF(` mov %rbx, n ') and $3, R32(%rbx) jz L(b0) @@ -145,5 +168,7 @@ L(ret): adc $0, %rdx mov %rdx, %rax pop %rbx +IFDOS(``pop %rdi '') +IFDOS(``pop %rsi '') ret EPILOGUE() diff --git a/mpn/x86_64/mul_1.asm b/mpn/x86_64/mul_1.asm index 5f8dc4c9c..3b87bbf01 100644 --- a/mpn/x86_64/mul_1.asm +++ b/mpn/x86_64/mul_1.asm @@ -28,38 +28,65 @@ C Intel corei 3.8 C Intel atom 19.8 C VIA nano ? -C The inner loop of this code is the result of running a code generation and +C The loop of this code is the result of running a code generation and C optimization tool suite written by David Harvey and Torbjorn Granlund. -C TODO: -C * The inner loop is great, but the prologue and epilogue code was -C quickly written. Tune it! +C TODO +C * The loop is great, but the prologue and epilogue code was quickly written. +C Tune it! -C INPUT PARAMETERS -define(`rp', `%rdi') -define(`up', `%rsi') -define(`n_param',`%rdx') -define(`vl', `%rcx') +define(`rp', `%rdi') C rcx +define(`up', `%rsi') C rdx +define(`n_param', `%rdx') C r8 +define(`vl', `%rcx') C r9 -define(`n', `%r11') +define(`n', `%r11') + +ifdef(`HOST_DOS64',` + define(`IFDOS', `$1') + define(`IFELF', `') +',` + define(`IFDOS', `') + define(`IFELF', `$1') +') + +ABI_SUPPORT(DOS64) +ABI_SUPPORT(ELF64) + +IFDOS(` define(`up', ``%rsi'') ') dnl +IFDOS(` define(`rp', ``%rcx'') ') dnl +IFDOS(` define(`vl', ``%r9'') ') dnl +IFDOS(` define(`r9', ``rdi'') ') dnl +IFDOS(` define(`n', ``%r8'') ') dnl +IFDOS(` define(`r8', ``r11'') ') dnl ASM_START() TEXT ALIGN(16) PROLOGUE(mpn_mul_1c) +IFDOS(``push %rsi '') +IFDOS(``push %rdi '') +IFDOS(``mov %rdx, %rsi '') push %rbx - mov %r8, %r10 +IFELF(` mov %r8, %r10') +IFDOS(` mov 64(%rsp), %r10') C 40 + 3*8 (3 push insns) jmp L(common) EPILOGUE() PROLOGUE(mpn_mul_1) + +IFDOS(``push %rsi '') +IFDOS(``push %rdi '') +IFDOS(``mov %rdx, %rsi '') + push %rbx xor %r10, %r10 L(common): mov (up), %rax C read first u limb early - mov n_param, %rbx C move away n from rdx, mul uses it +IFELF(` mov n_param, %rbx ') C move away n from rdx, mul uses it +IFDOS(` mov n, %rbx ') mul vl - mov %rbx, %r11 +IFELF(` mov %rbx, n ') add %r10, %rax adc $0, %rdx @@ -145,5 +172,7 @@ L(L2): mul vl L(ret): mov %rdx, %rax pop %rbx +IFDOS(``pop %rdi '') +IFDOS(``pop %rsi '') ret EPILOGUE() diff --git a/mpn/x86_64/mul_basecase.asm b/mpn/x86_64/mul_basecase.asm index fdba9a6e3..5fede9234 100644 --- a/mpn/x86_64/mul_basecase.asm +++ b/mpn/x86_64/mul_basecase.asm @@ -59,10 +59,23 @@ define(`n', `%r11') define(`outer_addr', `%r14') define(`un', `%r13') +ifdef(`HOST_DOS64',` + define(`IFDOS', `$1') + define(`IFELF', `') +',` + define(`IFDOS', `') + define(`IFELF', `$1') +') + +ABI_SUPPORT(DOS64) +ABI_SUPPORT(ELF64) + ASM_START() TEXT ALIGN(16) PROLOGUE(mpn_mul_basecase) + DOS64_ENTRY(4) +IFDOS(` mov 56(%rsp), %r8d ') push %rbx push %rbp push %r12 @@ -448,6 +461,7 @@ L(ret): pop %r15 pop %r12 pop %rbp pop %rbx + DOS64_EXIT() ret EPILOGUE() diff --git a/mpn/x86_64/sqr_basecase.asm b/mpn/x86_64/sqr_basecase.asm index 311daab8a..f71627ab9 100644 --- a/mpn/x86_64/sqr_basecase.asm +++ b/mpn/x86_64/sqr_basecase.asm @@ -75,12 +75,22 @@ define(`w1', `%rcx') define(`w2', `%rbp') define(`w3', `%r10') +ifdef(`HOST_DOS64',` + define(`IFDOS', `$1') + define(`IFELF', `') +',` + define(`IFDOS', `') + define(`IFELF', `$1') +') + +ABI_SUPPORT(DOS64) +ABI_SUPPORT(ELF64) ASM_START() TEXT ALIGN(16) - PROLOGUE(mpn_sqr_basecase) + DOS64_ENTRY(3) add $-40, %rsp mov %rbx, 32(%rsp) mov %rbp, 24(%rsp) @@ -115,6 +125,7 @@ L(1): mov (up), %rax mov %rdx, 8(rp) add $32, %rsp pop %rbx + DOS64_EXIT() ret L(2): mov (up), %rax @@ -139,6 +150,7 @@ L(2): mov (up), %rax mov %r11, 24(rp) add $32, %rsp pop %rbx + DOS64_EXIT() ret L(3): mov (up), %rax @@ -184,6 +196,7 @@ L(3): mov (up), %rax adc %rbx, 40(rp) add $32, %rsp pop %rbx + DOS64_EXIT() ret L(4): mov (up), %rax @@ -256,6 +269,7 @@ L(4): mov (up), %rax pop %r12 pop %rbp pop %rbx + DOS64_EXIT() ret @@ -780,5 +794,6 @@ L(d1): mov %r11, 24(rp,j,8) pop %r12 pop %rbp pop %rbx + DOS64_EXIT() ret EPILOGUE() -- cgit v1.2.1