summaryrefslogtreecommitdiff
path: root/x64masm.asm
diff options
context:
space:
mode:
authorweidai <weidai@57ff6487-cd31-0410-9ec3-f628ee90f5f0>2009-03-02 02:39:17 +0000
committerweidai <weidai@57ff6487-cd31-0410-9ec3-f628ee90f5f0>2009-03-02 02:39:17 +0000
commitcaf9e032e6b4ccb114a74a3936c916bcfaba262d (patch)
tree0fecaa7a6728d07549a41864ea2cedfb245f0bd3 /x64masm.asm
parent4e4793cc591e26c788b53c487bee7cab2d377f5e (diff)
downloadcryptopp-caf9e032e6b4ccb114a74a3936c916bcfaba262d.tar.gz
changes for 5.6:
- added AuthenticatedSymmetricCipher interface class and Filter wrappers - added CCM, GCM (with SSE2 assembly), CMAC, and SEED - improved AES speed on x86 and x64 - removed WORD64_AVAILABLE; compiler 64-bit int support is now required git-svn-id: svn://svn.code.sf.net/p/cryptopp/code/trunk/c5@433 57ff6487-cd31-0410-9ec3-f628ee90f5f0
Diffstat (limited to 'x64masm.asm')
-rwxr-xr-xx64masm.asm333
1 files changed, 0 insertions, 333 deletions
diff --git a/x64masm.asm b/x64masm.asm
index a395c9a..f27c002 100755
--- a/x64masm.asm
+++ b/x64masm.asm
@@ -1,59 +1,6 @@
include ksamd64.inc
EXTERNDEF s_sosemanukMulTables:FAR
-
.CODE
- ALIGN 8
-Baseline_Add PROC
- lea rdx, [rdx+8*rcx]
- lea r8, [r8+8*rcx]
- lea r9, [r9+8*rcx]
- neg rcx ; rcx is negative index
- jz $1@Baseline_Add
- mov rax,[r8+8*rcx]
- add rax,[r9+8*rcx]
- mov [rdx+8*rcx],rax
-$0@Baseline_Add:
- mov rax,[r8+8*rcx+8]
- adc rax,[r9+8*rcx+8]
- mov [rdx+8*rcx+8],rax
- lea rcx,[rcx+2] ; advance index, avoid inc which causes slowdown on Intel Core 2
- jrcxz $1@Baseline_Add ; loop until rcx overflows and becomes zero
- mov rax,[r8+8*rcx]
- adc rax,[r9+8*rcx]
- mov [rdx+8*rcx],rax
- jmp $0@Baseline_Add
-$1@Baseline_Add:
- mov rax, 0
- adc rax, rax ; store carry into rax (return result register)
- ret
-Baseline_Add ENDP
-
- ALIGN 8
-Baseline_Sub PROC
- lea rdx, [rdx+8*rcx]
- lea r8, [r8+8*rcx]
- lea r9, [r9+8*rcx]
- neg rcx ; rcx is negative index
- jz $1@Baseline_Sub
- mov rax,[r8+8*rcx]
- sub rax,[r9+8*rcx]
- mov [rdx+8*rcx],rax
-$0@Baseline_Sub:
- mov rax,[r8+8*rcx+8]
- sbb rax,[r9+8*rcx+8]
- mov [rdx+8*rcx+8],rax
- lea rcx,[rcx+2] ; advance index, avoid inc which causes slowdown on Intel Core 2
- jrcxz $1@Baseline_Sub ; loop until rcx overflows and becomes zero
- mov rax,[r8+8*rcx]
- sbb rax,[r9+8*rcx]
- mov [rdx+8*rcx],rax
- jmp $0@Baseline_Sub
-$1@Baseline_Sub:
- mov rax, 0
- adc rax, rax ; store carry into rax (return result register)
-
- ret
-Baseline_Sub ENDP
ALIGN 8
Salsa20_OperateKeystream PROC FRAME
@@ -761,286 +708,6 @@ movdqa xmm15, [rsp + 0290h]
add rsp, 10*16 + 32*16 + 8
ret
Salsa20_OperateKeystream ENDP
-ALIGN 8
-Rijndael_Enc_ProcessAndXorBlock PROC FRAME
-rex_push_reg rbx
-push_reg rsi
-push_reg rdi
-push_reg r12
-push_reg r13
-push_reg r14
-push_reg r15
-.endprolog
-mov r11, rcx
-mov rdi, [rsp + 5*8 + 7*8] ; inBlock
-mov eax, [r8+0*4]
-xor eax, [rdi+0*4]
-mov r13d, eax
-mov ebx, [r8+1*4]
-xor ebx, [rdi+1*4]
-mov r14d, ebx
-and ebx, eax
-mov eax, [r8+2*4]
-xor eax, [rdi+2*4]
-mov r15d, eax
-and ebx, eax
-mov ecx, [r8+3*4]
-xor ecx, [rdi+3*4]
-and ebx, ecx
-and ebx, 0
-mov edi, ebx
-label2:
-and ebx, [r11+rdi]
-add edi, edx
-and ebx, [r11+rdi]
-add edi, edx
-and ebx, [r11+rdi]
-add edi, edx
-and ebx, [r11+rdi]
-add edi, edx
-cmp edi, 1024
-jl label2
-and ebx, [r11+1020]
-xor r13d, ebx
-xor r14d, ebx
-xor r15d, ebx
-xor ecx, ebx
-mov edi, [r8+4*4]
-mov eax, [r8+5*4]
-mov ebx, [r8+6*4]
-mov edx, [r8+7*4]
-add r8, 8*4
-movzx esi, cl
-xor edx, [r11+0*1024+4*rsi]
-movzx esi, ch
-xor ebx, [r11+1*1024+4*rsi]
-shr ecx, 16
-movzx esi, cl
-xor eax, [r11+2*1024+4*rsi]
-movzx esi, ch
-xor edi, [r11+3*1024+4*rsi]
-mov ecx, r15d
-movzx esi, cl
-xor ebx, [r11+0*1024+4*rsi]
-movzx esi, ch
-xor eax, [r11+1*1024+4*rsi]
-shr ecx, 16
-movzx esi, cl
-xor edi, [r11+2*1024+4*rsi]
-movzx esi, ch
-xor edx, [r11+3*1024+4*rsi]
-mov ecx, r14d
-movzx esi, cl
-xor eax, [r11+0*1024+4*rsi]
-movzx esi, ch
-xor edi, [r11+1*1024+4*rsi]
-shr ecx, 16
-movzx esi, cl
-xor edx, [r11+2*1024+4*rsi]
-movzx esi, ch
-xor ebx, [r11+3*1024+4*rsi]
-mov ecx, r13d
-movzx esi, cl
-xor edi, [r11+0*1024+4*rsi]
-movzx esi, ch
-xor edx, [r11+1*1024+4*rsi]
-shr ecx, 16
-movzx esi, cl
-xor ebx, [r11+2*1024+4*rsi]
-movzx esi, ch
-xor eax, [r11+3*1024+4*rsi]
-mov r15d, ebx
-mov r14d, eax
-mov r13d, edi
-label0:
-mov edi, [r8+0*4]
-mov eax, [r8+1*4]
-mov ebx, [r8+2*4]
-mov ecx, [r8+3*4]
-movzx esi, dl
-xor edi, [r11+3*1024+4*rsi]
-movzx esi, dh
-xor eax, [r11+2*1024+4*rsi]
-shr edx, 16
-movzx esi, dl
-xor ebx, [r11+1*1024+4*rsi]
-movzx esi, dh
-xor ecx, [r11+0*1024+4*rsi]
-mov edx, r15d
-movzx esi, dl
-xor ecx, [r11+3*1024+4*rsi]
-movzx esi, dh
-xor edi, [r11+2*1024+4*rsi]
-shr edx, 16
-movzx esi, dl
-xor eax, [r11+1*1024+4*rsi]
-movzx esi, dh
-xor ebx, [r11+0*1024+4*rsi]
-mov edx, r14d
-movzx esi, dl
-xor ebx, [r11+3*1024+4*rsi]
-movzx esi, dh
-xor ecx, [r11+2*1024+4*rsi]
-shr edx, 16
-movzx esi, dl
-xor edi, [r11+1*1024+4*rsi]
-movzx esi, dh
-xor eax, [r11+0*1024+4*rsi]
-mov edx, r13d
-movzx esi, dl
-xor eax, [r11+3*1024+4*rsi]
-movzx esi, dh
-xor ebx, [r11+2*1024+4*rsi]
-shr edx, 16
-movzx esi, dl
-xor ecx, [r11+1*1024+4*rsi]
-movzx esi, dh
-xor edi, [r11+0*1024+4*rsi]
-mov r15d, ebx
-mov r14d, eax
-mov r13d, edi
-mov edi, [r8+4*4]
-mov eax, [r8+5*4]
-mov ebx, [r8+6*4]
-mov edx, [r8+7*4]
-movzx esi, cl
-xor edi, [r11+3*1024+4*rsi]
-movzx esi, ch
-xor eax, [r11+2*1024+4*rsi]
-shr ecx, 16
-movzx esi, cl
-xor ebx, [r11+1*1024+4*rsi]
-movzx esi, ch
-xor edx, [r11+0*1024+4*rsi]
-mov ecx, r15d
-movzx esi, cl
-xor edx, [r11+3*1024+4*rsi]
-movzx esi, ch
-xor edi, [r11+2*1024+4*rsi]
-shr ecx, 16
-movzx esi, cl
-xor eax, [r11+1*1024+4*rsi]
-movzx esi, ch
-xor ebx, [r11+0*1024+4*rsi]
-mov ecx, r14d
-movzx esi, cl
-xor ebx, [r11+3*1024+4*rsi]
-movzx esi, ch
-xor edx, [r11+2*1024+4*rsi]
-shr ecx, 16
-movzx esi, cl
-xor edi, [r11+1*1024+4*rsi]
-movzx esi, ch
-xor eax, [r11+0*1024+4*rsi]
-mov ecx, r13d
-movzx esi, cl
-xor eax, [r11+3*1024+4*rsi]
-movzx esi, ch
-xor ebx, [r11+2*1024+4*rsi]
-shr ecx, 16
-movzx esi, cl
-xor edx, [r11+1*1024+4*rsi]
-movzx esi, ch
-xor edi, [r11+0*1024+4*rsi]
-mov r15d, ebx
-mov r14d, eax
-mov r13d, edi
-add r8, 8*4
-cmp r9, r8
-jne label0
-mov eax, [r9+0*4]
-mov ecx, [r9+1*4]
-mov esi, [r9+2*4]
-mov edi, [r9+3*4]
-movzx ebx, dl
-movzx ebx, BYTE PTR [r11+1+4*rbx]
-shl ebx, 3*8
-xor eax, ebx
-movzx ebx, dh
-movzx ebx, BYTE PTR [r11+1+4*rbx]
-shl ebx, 2*8
-xor ecx, ebx
-shr edx, 16
-movzx ebx, dl
-shr edx, 8
-movzx ebx, BYTE PTR [r11+1+4*rbx]
-shl ebx, 1*8
-xor esi, ebx
-movzx ebx, BYTE PTR [r11+1+4*rdx]
-xor edi, ebx
-mov edx, r15d
-movzx ebx, dl
-movzx ebx, BYTE PTR [r11+1+4*rbx]
-shl ebx, 3*8
-xor edi, ebx
-movzx ebx, dh
-movzx ebx, BYTE PTR [r11+1+4*rbx]
-shl ebx, 2*8
-xor eax, ebx
-shr edx, 16
-movzx ebx, dl
-shr edx, 8
-movzx ebx, BYTE PTR [r11+1+4*rbx]
-shl ebx, 1*8
-xor ecx, ebx
-movzx ebx, BYTE PTR [r11+1+4*rdx]
-xor esi, ebx
-mov edx, r14d
-movzx ebx, dl
-movzx ebx, BYTE PTR [r11+1+4*rbx]
-shl ebx, 3*8
-xor esi, ebx
-movzx ebx, dh
-movzx ebx, BYTE PTR [r11+1+4*rbx]
-shl ebx, 2*8
-xor edi, ebx
-shr edx, 16
-movzx ebx, dl
-shr edx, 8
-movzx ebx, BYTE PTR [r11+1+4*rbx]
-shl ebx, 1*8
-xor eax, ebx
-movzx ebx, BYTE PTR [r11+1+4*rdx]
-xor ecx, ebx
-mov edx, r13d
-movzx ebx, dl
-movzx ebx, BYTE PTR [r11+1+4*rbx]
-shl ebx, 3*8
-xor ecx, ebx
-movzx ebx, dh
-movzx ebx, BYTE PTR [r11+1+4*rbx]
-shl ebx, 2*8
-xor esi, ebx
-shr edx, 16
-movzx ebx, dl
-shr edx, 8
-movzx ebx, BYTE PTR [r11+1+4*rbx]
-shl ebx, 1*8
-xor edi, ebx
-movzx ebx, BYTE PTR [r11+1+4*rdx]
-xor eax, ebx
-mov rbx, [rsp + 6*8 + 7*8] ; xorBlock
-test rbx, rbx
-jz label1
-xor eax, [rbx+0*4]
-xor ecx, [rbx+1*4]
-xor esi, [rbx+2*4]
-xor edi, [rbx+3*4]
-label1:
-mov rbx, [rsp + 7*8 + 7*8] ; outBlock
-mov [rbx+0*4], eax
-mov [rbx+1*4], ecx
-mov [rbx+2*4], esi
-mov [rbx+3*4], edi
-pop r15
-pop r14
-pop r13
-pop r12
-pop rdi
-pop rsi
-pop rbx
-ret
-Rijndael_Enc_ProcessAndXorBlock ENDP
ALIGN 8
Sosemanuk_OperateKeystream PROC FRAME