summaryrefslogtreecommitdiff
path: root/crypto/bn/asm/x86-bsdi.s
diff options
context:
space:
mode:
Diffstat (limited to 'crypto/bn/asm/x86-bsdi.s')
-rw-r--r--crypto/bn/asm/x86-bsdi.s272
1 files changed, 272 insertions, 0 deletions
diff --git a/crypto/bn/asm/x86-bsdi.s b/crypto/bn/asm/x86-bsdi.s
new file mode 100644
index 000000000..ca6687648
--- /dev/null
+++ b/crypto/bn/asm/x86-bsdi.s
@@ -0,0 +1,272 @@
+ .file "bn_mulw.c"
+ .version "01.01"
+gcc2_compiled.:
+.text
+ .align 4
+.globl _bn_mul_add_word
+_bn_mul_add_word:
+ pushl %ebp
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+
+ # ax L(t)
+ # dx H(t)
+ # bx a
+ # cx w
+ # di r
+ # si c
+ # bp num
+ xorl %esi,%esi # c=0
+ movl 20(%esp),%edi # r => edi
+ movl 24(%esp),%ebx # a => exb
+ movl 32(%esp),%ecx # w => ecx
+ movl 28(%esp),%ebp # num => ebp
+
+ shrl $2,%ebp # num/4
+ je .L910
+
+# .align 4
+.L110:
+ # Round 1
+ movl %ecx,%eax # w => eax
+ mull (%ebx) # w * *a
+ addl (%edi),%eax # *r+=L(t)
+ adcl $0,%edx # H(t)+= carry
+ addl %esi,%eax # L(t)+=c
+ adcl $0,%edx # H(t)+=carry
+ movl %eax,(%edi) # *r+=L(t)
+ movl %edx,%esi # c=H(t)
+
+ # Round 2
+ movl %ecx,%eax # w => eax
+ mull 4(%ebx) # w * *a
+ addl 4(%edi),%eax # *r+=L(t)
+ adcl $0,%edx # H(t)+= carry
+ addl %esi,%eax # L(t)+=c
+ adcl $0,%edx # H(t)+=carry
+ movl %eax,4(%edi) # *r+=L(t)
+ movl %edx,%esi # c=H(t)
+
+ # Round 3
+ movl %ecx,%eax # w => eax
+ mull 8(%ebx) # w * *a
+ addl 8(%edi),%eax # *r+=L(t)
+ adcl $0,%edx # H(t)+=carry
+ addl %esi,%eax # L(t)+=c
+ adcl $0,%edx # H(t)+=carry
+ movl %eax,8(%edi) # *r+=L(t)
+ movl %edx,%esi # c=H(t)
+
+ # Round 4
+ movl %ecx,%eax # w => eax
+ mull 12(%ebx) # w * *a
+ addl 12(%edi),%eax # *r+=L(t)
+ adcl $0,%edx # H(t)+=carry
+ addl %esi,%eax # L(t)+=c
+ adcl $0,%edx # H(t)+=carry
+ movl %eax,12(%edi) # *r+=L(t)
+ movl %edx,%esi # c=H(t)
+
+ addl $16,%ebx # a+=4 (4 words)
+ addl $16,%edi # r+=4 (4 words)
+
+ decl %ebp # --num
+ je .L910
+ jmp .L110
+# .align 4
+.L910:
+ movl 28(%esp),%ebp # num => ebp
+ andl $3,%ebp
+ je .L111
+
+ # Round 1
+ movl %ecx,%eax # w => eax
+ mull (%ebx) # w * *a
+ addl (%edi),%eax # *r+=L(t)
+ adcl $0,%edx # H(t)+=carry
+ addl %esi,%eax # L(t)+=c
+ adcl $0,%edx # H(t)+=carry
+ movl %eax,(%edi) # *r+=L(t)
+ movl %edx,%esi # c=H(t)
+ decl %ebp # --num
+ je .L111
+
+ # Round 2
+ movl %ecx,%eax # w => eax
+ mull 4(%ebx) # w * *a
+ addl 4(%edi),%eax # *r+=L(t)
+ adcl $0,%edx # H(t)+=carry
+ addl %esi,%eax # L(t)+=c
+ adcl $0,%edx # H(t)+=carry
+ movl %eax,4(%edi) # *r+=L(t)
+ movl %edx,%esi # c=H(t)
+ decl %ebp # --num
+ je .L111
+
+ # Round 3
+ movl %ecx,%eax # w => eax
+ mull 8(%ebx) # w * *a
+ addl 8(%edi),%eax # *r+=L(t)
+ adcl $0,%edx # H(t)+=carry
+ addl %esi,%eax # L(t)+=c
+ adcl $0,%edx # H(t)+=carry
+ movl %eax,8(%edi) # *r+=L(t)
+ movl %edx,%esi # c=H(t)
+
+# .align 4
+.L111:
+ movl %esi,%eax # return(c)
+ popl %ebx
+ popl %esi
+ popl %edi
+ popl %ebp
+ ret
+.Lfe1:
+ .align 4
+.globl _bn_mul_word
+_bn_mul_word:
+ pushl %ebp
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+
+ # ax L(t)
+ # dx H(t)
+ # bx a
+ # cx w
+ # di r
+ # num bp
+ # si c
+ xorl %esi,%esi # c=0
+ movl 20(%esp),%edi # r => edi
+ movl 24(%esp),%ebx # a => exb
+ movl 28(%esp),%ebp # num => bp
+ movl 32(%esp),%ecx # w => ecx
+
+# .align 4
+.L210:
+ movl %ecx,%eax # w => eax
+ mull (%ebx) # w * *a
+ addl %esi,%eax # L(t)+=c
+ adcl $0,%edx # H(t)+=carry
+ movl %eax,(%edi) # *r=L(t)
+ movl %edx,%esi # c=H(t)
+ decl %ebp # --num
+ je .L211
+
+ movl %ecx,%eax # w => eax
+ mull 4(%ebx) # w * *a
+ addl %esi,%eax # L(t)+=c
+ adcl $0,%edx # H(t)+=carry
+ movl %eax,4(%edi) # *r=L(t)
+ movl %edx,%esi # c=H(t)
+ decl %ebp # --num
+ je .L211
+
+ movl %ecx,%eax # w => eax
+ mull 8(%ebx) # w * *a
+ addl %esi,%eax # L(t)+=c
+ adcl $0,%edx # H(t)+=carry
+ movl %eax,8(%edi) # *r=L(t)
+ movl %edx,%esi # c=H(t)
+ decl %ebp # --num
+ je .L211
+
+ movl %ecx,%eax # w => eax
+ mull 12(%ebx) # w * *a
+ addl %esi,%eax # L(t)+=c
+ adcl $0,%edx # H(t)+=carry
+ movl %eax,12(%edi) # *r=L(t)
+ movl %edx,%esi # c=H(t)
+ decl %ebp # --num
+ je .L211
+
+ addl $16,%ebx # a+=4 (4 words)
+ addl $16,%edi # r+=4 (4 words)
+
+ jmp .L210
+# .align 4
+.L211:
+ movl %esi,%eax # return(c)
+ popl %ebx
+ popl %esi
+ popl %edi
+ popl %ebp
+ ret
+.Lfe2:
+ .align 4
+.globl _bn_sqr_words
+_bn_sqr_words:
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ movl 16(%esp),%esi # r
+ movl 20(%esp),%edi # a
+ movl 24(%esp),%ebx # n
+# .align 4
+ shrl $2,%ebx
+ jz .L99
+.L28:
+ movl (%edi),%eax # get a
+ mull %eax # a*a
+ movl %eax,(%esi) # put low into return addr
+ movl %edx,4(%esi) # put high into return addr
+
+ movl 4(%edi),%eax # get a
+ mull %eax # a*a
+ movl %eax,8(%esi) # put low into return addr
+ movl %edx,12(%esi) # put high into return addr
+
+ movl 8(%edi),%eax # get a
+ mull %eax # a*a
+ movl %eax,16(%esi) # put low into return addr
+ movl %edx,20(%esi) # put high into return addr
+
+ movl 12(%edi),%eax # get a
+ mull %eax # a*a
+ movl %eax,24(%esi) # put low into return addr
+ movl %edx,28(%esi) # put high into return addr
+
+ addl $16,%edi
+ addl $32,%esi
+ decl %ebx # n-=4;
+ jz .L99
+ jmp .L28
+# .align 4
+.L99:
+ movl 24(%esp),%ebx # n
+ andl $3,%ebx
+ jz .L29
+ movl (%edi),%eax # get a
+ mull %eax # a*a
+ movl %eax,(%esi) # put low into return addr
+ movl %edx,4(%esi) # put high into return addr
+ decl %ebx # n--;
+ jz .L29
+ movl 4(%edi),%eax # get a
+ mull %eax # a*a
+ movl %eax,8(%esi) # put low into return addr
+ movl %edx,12(%esi) # put high into return addr
+ decl %ebx # n--;
+ jz .L29
+ movl 8(%edi),%eax # get a
+ mull %eax # a*a
+ movl %eax,16(%esi) # put low into return addr
+ movl %edx,20(%esi) # put high into return addr
+
+.L29:
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+.Lfe3:
+ .align 4
+.globl _bn_div64
+_bn_div64:
+ movl 4(%esp),%edx # a
+ movl 8(%esp),%eax # b
+ divl 12(%esp) # ab/c
+ ret
+.Lfe4:
+ .ident "GCC: (GNU) 2.6.3"