summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--x86/arcfour-crypt.asm63
1 files changed, 42 insertions, 21 deletions
diff --git a/x86/arcfour-crypt.asm b/x86/arcfour-crypt.asm
index 8bf04539..bdeb98f0 100644
--- a/x86/arcfour-crypt.asm
+++ b/x86/arcfour-crypt.asm
@@ -38,48 +38,69 @@ C Input arguments:
C src = 32(%esp)
C Register usage:
C %ebp = ctx
- C %esi = src (updated through out loop)
- C %edi = dst (updated through out loop)
- C %edx = src + length (end of source area)
+ C %esi = src
+ C %edi = dst
+ C %edx = loop counter
C %eax = i
C %ebx = j
C %cl = si
C %ch = sj
movl 24(%esp), %edx C length
- testl %edx,%edx
- jz .Lend
-
movl 20(%esp), %ebp C ctx
- movl 28(%esp), %edi
- movl 32(%esp), %esi
- addl %esi, %edx C Keep src + length
+ movl 28(%esp), %edi C dst
+ movl 32(%esp), %esi C src
+
+ lea (%edx, %edi), %edi
+ lea (%edx, %esi), %esi
+ negl %edx
+ jnc .Lend
movzbl ARCFOUR_I (%ebp), %eax C i
movzbl ARCFOUR_J (%ebp), %ebx C j
+
+ incb %al
+ sarl $1, %edx
+ jc .Lloop_odd
+
+ ALIGN(4)
.Lloop:
-C incb %al
+ movb (%ebp, %eax), %cl C si.
+ addb %cl, %bl
+ movb (%ebp, %ebx), %ch C sj
+ movb %ch, (%ebp, %eax) C S[i] = sj
incl %eax
- andl $0xff, %eax
- movzbl (%ebp, %eax), %ecx C si. Clears high bytes
+ movzbl %al, %eax
+ movb %cl, (%ebp, %ebx) C S[j] = si
+ addb %ch, %cl
+ movzbl %cl, %ecx C Clear, so it can be used
+ C for indexing.
+ movb (%ebp, %ecx), %cl
+ xorb (%esi, %edx, 2), %cl
+ movb %cl, (%edi, %edx, 2)
+
+ C FIXME: Could exchange cl and ch in the second half
+ C and try to interleave instructions better.
+.Lloop_odd:
+ movb (%ebp, %eax), %cl C si.
addb %cl, %bl
-C The addl andl is preferable on PPro and PII, but slows thing down on AMD Duron.
-C addl %ecx, %ebx
-C andl $0xff, %ebx
movb (%ebp, %ebx), %ch C sj
movb %ch, (%ebp, %eax) C S[i] = sj
+ incl %eax
+ movzbl %al, %eax
movb %cl, (%ebp, %ebx) C S[j] = si
addb %ch, %cl
movzbl %cl, %ecx C Clear, so it can be used
C for indexing.
movb (%ebp, %ecx), %cl
- xorb (%esi), %cl
- incl %esi
- movb %cl, (%edi)
- incl %edi
- cmpl %esi, %edx
- jne .Lloop
+ xorb 1(%esi, %edx, 2), %cl
+ incl %edx
+ movb %cl, -1(%edi, %edx, 2)
+
+ jnz .Lloop
+C .Lloop_done:
+ decb %al
movb %al, ARCFOUR_I (%ebp) C Store the new i and j.
movb %bl, ARCFOUR_J (%ebp)
.Lend: