diff options
Diffstat (limited to 'mpi/amd64/mpih-sub1.S')
-rw-r--r-- | mpi/amd64/mpih-sub1.S | 80 |
1 files changed, 68 insertions, 12 deletions
diff --git a/mpi/amd64/mpih-sub1.S b/mpi/amd64/mpih-sub1.S index 8c61cb20..32799c86 100644 --- a/mpi/amd64/mpih-sub1.S +++ b/mpi/amd64/mpih-sub1.S @@ -3,6 +3,7 @@ * * Copyright (C) 1992, 1994, 1995, 1998, * 2001, 2002, 2006 Free Software Foundation, Inc. + * Copyright (C) 2023 Jussi Kivilinna <jussi.kivilinna@iki.fi> * * This file is part of Libgcrypt. * @@ -44,20 +45,75 @@ .globl C_SYMBOL_NAME(_gcry_mpih_sub_n) C_SYMBOL_NAME(_gcry_mpih_sub_n:) FUNC_ENTRY() - leaq (%rsi,%rcx,8), %rsi - leaq (%rdi,%rcx,8), %rdi - leaq (%rdx,%rcx,8), %rdx - negq %rcx - xorl %eax, %eax /* clear cy */ + movl %ecx, %r9d + andl $3, %r9d + je .Lprehandle0 + cmpl $2, %r9d + jb .Lprehandle1 + je .Lprehandle2 + +#define FIRST_SUB() \ + movq (%rsi), %rax; \ + subq (%rdx), %rax; \ + movq %rax, (%rdi) + +#define NEXT_SUB(offset) \ + movq offset(%rsi), %rax; \ + sbbq offset(%rdx), %rax; \ + movq %rax, offset(%rdi) + +.Lprehandle3: + leaq -2(%rcx), %rcx + FIRST_SUB(); + NEXT_SUB(8); + NEXT_SUB(16); + decq %rcx + je .Lend + leaq 24(%rsi), %rsi + leaq 24(%rdx), %rdx + leaq 24(%rdi), %rdi + jmp .Loop + + ALIGN(3) +.Lprehandle2: + leaq -1(%rcx), %rcx + FIRST_SUB(); + NEXT_SUB(8); + decq %rcx + je .Lend + leaq 16(%rsi), %rsi + leaq 16(%rdx), %rdx + leaq 16(%rdi), %rdi + jmp .Loop + + ALIGN(3) +.Lprehandle1: + FIRST_SUB(); + decq %rcx + je .Lend + leaq 8(%rsi), %rsi + leaq 8(%rdx), %rdx + leaq 8(%rdi), %rdi + jmp .Loop + + ALIGN(3) +.Lprehandle0: + clc /* clear cy */ ALIGN(4) /* minimal alignment for claimed speed */ -.Loop: movq (%rsi,%rcx,8), %rax - movq (%rdx,%rcx,8), %r10 - sbbq %r10, %rax - movq %rax, (%rdi,%rcx,8) - incq %rcx +.Loop: leaq -3(%rcx), %rcx + NEXT_SUB(0); + NEXT_SUB(8); + NEXT_SUB(16); + NEXT_SUB(24); + leaq 32(%rsi), %rsi + leaq 32(%rdx), %rdx + leaq 32(%rdi), %rdi + decq %rcx jne .Loop - movq %rcx, %rax /* zero %rax */ - adcq %rax, %rax + ALIGN(2) +.Lend: + movl $0, %eax /* zero %rax */ + adcl %eax, %eax FUNC_EXIT() |