summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNikos Mavrogiannopoulos <nmav@gnutls.org>2011-10-12 20:11:21 +0200
committerNikos Mavrogiannopoulos <nmav@gnutls.org>2011-10-12 20:14:00 +0200
commit7441316a38a03003aea456449d5f809ae57209b1 (patch)
tree16243d8dcd95ae955c3e7986aab20bdec84e32b0
parentf7013222c2d4e42ea0cc5c8ee27d86e9123a13d8 (diff)
downloadgnutls-7441316a38a03003aea456449d5f809ae57209b1.tar.gz
new version of padlock by Andy.
-rw-r--r--lib/accelerated/x86/asm/padlock-x86-64.s70
-rw-r--r--lib/accelerated/x86/asm/padlock-x86.s138
2 files changed, 141 insertions, 67 deletions
diff --git a/lib/accelerated/x86/asm/padlock-x86-64.s b/lib/accelerated/x86/asm/padlock-x86-64.s
index c15da11fe0..ad3ab6de9c 100644
--- a/lib/accelerated/x86/asm/padlock-x86-64.s
+++ b/lib/accelerated/x86/asm/padlock-x86-64.s
@@ -274,6 +274,8 @@ padlock_ecb_encrypt:
leaq 16(%rdx),%rdx
xorl %eax,%eax
xorl %ebx,%ebx
+ cmpq $128,%rcx
+ jbe .Lecb_short
testl $32,(%rdx)
jnz .Lecb_aligned
testq $15,%rdi
@@ -335,19 +337,34 @@ padlock_ecb_encrypt:
movq $512,%rbx
jnz .Lecb_loop
- testq $15,%rdi
- jz .Lecb_done
+ cmpq %rsp,%rbp
+ je .Lecb_done
+
+ pxor %xmm0,%xmm0
+ leaq (%rsp),%rax
+.Lecb_bzero:
+ movaps %xmm0,(%rax)
+ leaq 16(%rax),%rax
+ cmpq %rax,%rbp
+ ja .Lecb_bzero
- movq %rbp,%rcx
- movq %rsp,%rdi
- subq %rsp,%rcx
- xorq %rax,%rax
- shrq $3,%rcx
-.byte 0xf3,0x48,0xab
.Lecb_done:
leaq (%rbp),%rsp
jmp .Lecb_exit
-
+.align 16
+.Lecb_short:
+ movq %rsp,%rbp
+ subq %rcx,%rsp
+ xorq %rbx,%rbx
+.Lecb_short_copy:
+ movups (%rsi,%rbx,1),%xmm0
+ leaq 16(%rbx),%rbx
+ cmpq %rbx,%rcx
+ movaps %xmm0,-16(%rsp,%rbx,1)
+ ja .Lecb_short_copy
+ movq %rsp,%rsi
+ movq %rcx,%rbx
+ jmp .Lecb_loop
.align 16
.Lecb_aligned:
leaq -16(%rdx),%rax
@@ -381,6 +398,8 @@ padlock_cbc_encrypt:
leaq 16(%rdx),%rdx
xorl %eax,%eax
xorl %ebx,%ebx
+ cmpq $64,%rcx
+ jbe .Lcbc_short
testl $32,(%rdx)
jnz .Lcbc_aligned
testq $15,%rdi
@@ -444,19 +463,34 @@ padlock_cbc_encrypt:
movq $512,%rbx
jnz .Lcbc_loop
- testq $15,%rdi
- jz .Lcbc_done
+ cmpq %rsp,%rbp
+ je .Lcbc_done
+
+ pxor %xmm0,%xmm0
+ leaq (%rsp),%rax
+.Lcbc_bzero:
+ movaps %xmm0,(%rax)
+ leaq 16(%rax),%rax
+ cmpq %rax,%rbp
+ ja .Lcbc_bzero
- movq %rbp,%rcx
- movq %rsp,%rdi
- subq %rsp,%rcx
- xorq %rax,%rax
- shrq $3,%rcx
-.byte 0xf3,0x48,0xab
.Lcbc_done:
leaq (%rbp),%rsp
jmp .Lcbc_exit
-
+.align 16
+.Lcbc_short:
+ movq %rsp,%rbp
+ subq %rcx,%rsp
+ xorq %rbx,%rbx
+.Lcbc_short_copy:
+ movups (%rsi,%rbx,1),%xmm0
+ leaq 16(%rbx),%rbx
+ cmpq %rbx,%rcx
+ movaps %xmm0,-16(%rsp,%rbx,1)
+ ja .Lcbc_short_copy
+ movq %rsp,%rsi
+ movq %rcx,%rbx
+ jmp .Lcbc_loop
.align 16
.Lcbc_aligned:
leaq -16(%rdx),%rax
diff --git a/lib/accelerated/x86/asm/padlock-x86.s b/lib/accelerated/x86/asm/padlock-x86.s
index 06035d0f92..9b3675e8fa 100644
--- a/lib/accelerated/x86/asm/padlock-x86.s
+++ b/lib/accelerated/x86/asm/padlock-x86.s
@@ -185,14 +185,16 @@ padlock_ecb_encrypt:
leal 16(%edx),%edx
xorl %eax,%eax
xorl %ebx,%ebx
+ cmpl $128,%ecx
+ jbe .L006ecb_short
testl $32,(%edx)
- jnz .L006ecb_aligned
+ jnz .L007ecb_aligned
testl $15,%edi
setz %al
testl $15,%esi
setz %bl
testl %ebx,%eax
- jnz .L006ecb_aligned
+ jnz .L007ecb_aligned
negl %eax
movl $512,%ebx
notl %eax
@@ -205,9 +207,9 @@ padlock_ecb_encrypt:
andl $511,%ebx
leal (%eax,%ebp,1),%esp
andl $-16,%esp
- jmp .L007ecb_loop
+ jmp .L008ecb_loop
.align 16
-.L007ecb_loop:
+.L008ecb_loop:
movl %edi,(%ebp)
movl %esi,4(%ebp)
movl %ecx,8(%ebp)
@@ -216,13 +218,13 @@ padlock_ecb_encrypt:
testl $15,%edi
cmovnzl %esp,%edi
testl $15,%esi
- jz .L008ecb_inp_aligned
+ jz .L009ecb_inp_aligned
shrl $2,%ecx
.byte 243,165
subl %ebx,%edi
movl %ebx,%ecx
movl %edi,%esi
-.L008ecb_inp_aligned:
+.L009ecb_inp_aligned:
leal -16(%edx),%eax
leal 16(%edx),%ebx
shrl $4,%ecx
@@ -230,38 +232,56 @@ padlock_ecb_encrypt:
movl (%ebp),%edi
movl 12(%ebp),%ebx
testl $15,%edi
- jz .L009ecb_out_aligned
+ jz .L010ecb_out_aligned
movl %ebx,%ecx
shrl $2,%ecx
leal (%esp),%esi
.byte 243,165
subl %ebx,%edi
-.L009ecb_out_aligned:
+.L010ecb_out_aligned:
movl 4(%ebp),%esi
movl 8(%ebp),%ecx
addl %ebx,%edi
addl %ebx,%esi
subl %ebx,%ecx
movl $512,%ebx
- jnz .L007ecb_loop
- testl $15,%edi
- jz .L010ecb_done
- movl %ebp,%ecx
- movl %esp,%edi
- subl %esp,%ecx
- xorl %eax,%eax
- shrl $2,%ecx
-.byte 243,171
-.L010ecb_done:
+ jnz .L008ecb_loop
+ cmpl %ebp,%esp
+ je .L011ecb_done
+ pxor %xmm0,%xmm0
+ leal (%esp),%eax
+.L012ecb_bzero:
+ movaps %xmm0,(%eax)
+ leal 16(%eax),%eax
+ cmpl %eax,%ebp
+ ja .L012ecb_bzero
+.L011ecb_done:
leal 24(%ebp),%esp
- jmp .L011ecb_exit
+ jmp .L013ecb_exit
+.align 16
+.L006ecb_short:
+ xorl %eax,%eax
+ leal -24(%esp),%ebp
+ subl %ecx,%eax
+ leal (%eax,%ebp,1),%esp
+ andl $-16,%esp
+ xorl %ebx,%ebx
+.L014ecb_short_copy:
+ movups (%esi,%ebx,1),%xmm0
+ leal 16(%ebx),%ebx
+ cmpl %ebx,%ecx
+ movaps %xmm0,-16(%esp,%ebx,1)
+ ja .L014ecb_short_copy
+ movl %esp,%esi
+ movl %ecx,%ebx
+ jmp .L008ecb_loop
.align 16
-.L006ecb_aligned:
+.L007ecb_aligned:
leal -16(%edx),%eax
leal 16(%edx),%ebx
shrl $4,%ecx
.byte 243,15,167,200
-.L011ecb_exit:
+.L013ecb_exit:
movl $1,%eax
leal 4(%esp),%esp
.L004ecb_abort:
@@ -285,25 +305,27 @@ padlock_cbc_encrypt:
movl 28(%esp),%edx
movl 32(%esp),%ecx
testl $15,%edx
- jnz .L012cbc_abort
+ jnz .L015cbc_abort
testl $15,%ecx
- jnz .L012cbc_abort
- leal .Lpadlock_saved_context-.L013cbc_pic_point,%eax
+ jnz .L015cbc_abort
+ leal .Lpadlock_saved_context-.L016cbc_pic_point,%eax
pushfl
cld
call _padlock_verify_ctx
-.L013cbc_pic_point:
+.L016cbc_pic_point:
leal 16(%edx),%edx
xorl %eax,%eax
xorl %ebx,%ebx
+ cmpl $64,%ecx
+ jbe .L017cbc_short
testl $32,(%edx)
- jnz .L014cbc_aligned
+ jnz .L018cbc_aligned
testl $15,%edi
setz %al
testl $15,%esi
setz %bl
testl %ebx,%eax
- jnz .L014cbc_aligned
+ jnz .L018cbc_aligned
negl %eax
movl $512,%ebx
notl %eax
@@ -316,9 +338,9 @@ padlock_cbc_encrypt:
andl $511,%ebx
leal (%eax,%ebp,1),%esp
andl $-16,%esp
- jmp .L015cbc_loop
+ jmp .L019cbc_loop
.align 16
-.L015cbc_loop:
+.L019cbc_loop:
movl %edi,(%ebp)
movl %esi,4(%ebp)
movl %ecx,8(%ebp)
@@ -327,13 +349,13 @@ padlock_cbc_encrypt:
testl $15,%edi
cmovnzl %esp,%edi
testl $15,%esi
- jz .L016cbc_inp_aligned
+ jz .L020cbc_inp_aligned
shrl $2,%ecx
.byte 243,165
subl %ebx,%edi
movl %ebx,%ecx
movl %edi,%esi
-.L016cbc_inp_aligned:
+.L020cbc_inp_aligned:
leal -16(%edx),%eax
leal 16(%edx),%ebx
shrl $4,%ecx
@@ -343,43 +365,61 @@ padlock_cbc_encrypt:
movl (%ebp),%edi
movl 12(%ebp),%ebx
testl $15,%edi
- jz .L017cbc_out_aligned
+ jz .L021cbc_out_aligned
movl %ebx,%ecx
shrl $2,%ecx
leal (%esp),%esi
.byte 243,165
subl %ebx,%edi
-.L017cbc_out_aligned:
+.L021cbc_out_aligned:
movl 4(%ebp),%esi
movl 8(%ebp),%ecx
addl %ebx,%edi
addl %ebx,%esi
subl %ebx,%ecx
movl $512,%ebx
- jnz .L015cbc_loop
- testl $15,%edi
- jz .L018cbc_done
- movl %ebp,%ecx
- movl %esp,%edi
- subl %esp,%ecx
- xorl %eax,%eax
- shrl $2,%ecx
-.byte 243,171
-.L018cbc_done:
+ jnz .L019cbc_loop
+ cmpl %ebp,%esp
+ je .L022cbc_done
+ pxor %xmm0,%xmm0
+ leal (%esp),%eax
+.L023cbc_bzero:
+ movaps %xmm0,(%eax)
+ leal 16(%eax),%eax
+ cmpl %eax,%ebp
+ ja .L023cbc_bzero
+.L022cbc_done:
leal 24(%ebp),%esp
- jmp .L019cbc_exit
+ jmp .L024cbc_exit
+.align 16
+.L017cbc_short:
+ xorl %eax,%eax
+ leal -24(%esp),%ebp
+ subl %ecx,%eax
+ leal (%eax,%ebp,1),%esp
+ andl $-16,%esp
+ xorl %ebx,%ebx
+.L025cbc_short_copy:
+ movups (%esi,%ebx,1),%xmm0
+ leal 16(%ebx),%ebx
+ cmpl %ebx,%ecx
+ movaps %xmm0,-16(%esp,%ebx,1)
+ ja .L025cbc_short_copy
+ movl %esp,%esi
+ movl %ecx,%ebx
+ jmp .L019cbc_loop
.align 16
-.L014cbc_aligned:
+.L018cbc_aligned:
leal -16(%edx),%eax
leal 16(%edx),%ebx
shrl $4,%ecx
.byte 243,15,167,208
movaps (%eax),%xmm0
movaps %xmm0,-16(%edx)
-.L019cbc_exit:
+.L024cbc_exit:
movl $1,%eax
leal 4(%esp),%esp
-.L012cbc_abort:
+.L015cbc_abort:
popl %edi
popl %esi
popl %ebx
@@ -405,10 +445,10 @@ _win32_segv_handler:
movl 4(%esp),%edx
movl 12(%esp),%ecx
cmpl $3221225477,(%edx)
- jne .L020ret
+ jne .L026ret
addl $4,184(%ecx)
movl $0,%eax
-.L020ret:
+.L026ret:
ret
.size _win32_segv_handler,.-_win32_segv_handler
.globl padlock_sha1_oneshot