summaryrefslogtreecommitdiff
path: root/x86
diff options
context:
space:
mode:
authorNiels Möller <nisse@lysator.liu.se>2008-09-13 21:02:33 +0200
committerNiels Möller <nisse@lysator.liu.se>2008-09-13 21:02:33 +0200
commit832eebcc227137c768243a50abae8a325a39135b (patch)
treedb9f933648329622511f64ed203d0aa6f4422d81 /x86
parentd7b85b91c61c01ed4dbab9c7d6029365ab36a795 (diff)
downloadnettle-832eebcc227137c768243a50abae8a325a39135b.tar.gz
Further micro optimizations.
Rev: nettle/x86/aes.m4:1.4
Diffstat (limited to 'x86')
-rw-r--r--x86/aes.m460
1 files changed, 25 insertions, 35 deletions
diff --git a/x86/aes.m4 b/x86/aes.m4
index 98f4e816..5059a3ef 100644
--- a/x86/aes.m4
+++ b/x86/aes.m4
@@ -1,23 +1,16 @@
-dnl BYTEREG(reg) gives the 8-bit register corresponding to the given 32-bit register.
+dnl LREG(reg) gives the 8-bit register corresponding to the given 32-bit register.
dnl Use in AES_SUBST_BYTE below, and is used by both the x86 and the x86_64 assembler.
-define(<BYTEREG>,<ifelse(
+define(<LREG>,<ifelse(
$1, %eax, %al,
$1, %ebx, %bl,
$1, %ecx, %cl,
- $1, %edx, %dl,
- dnl The rest are x86_64 only
- $1, %esi, %sil,
- $1, %edi, %dil,
- $1, %ebp, %bpl,
- $1, %esp, %spl,
- $1, %r8d, %r8b,
- $1, %r9d, %r9b,
- $1, %r10d, %r10b,
- $1, %r11d, %r11b,
- $1, %r12d, %r12b,
- $1, %r13d, %r13b,
- $1, %r14d, %r14b,
- $1, %r15d, %r15b)>)dnl
+ $1, %edx, %dl)>)dnl
+
+define(<HREG>,<ifelse(
+ $1, %eax, %ah,
+ $1, %ebx, %bh,
+ $1, %ecx, %ch,
+ $1, %edx, %dh)>)dnl
dnl AES_LOAD(a, b, c, d, src, key)
dnl Loads the next block of data from src, and add the subkey pointed
@@ -51,20 +44,18 @@ define(<AES_STORE>, <
movl $3,8($6)
movl $4,12($6)>)dnl
-dnl AES_ROUND(table,a,b,c,d,out,tmp)
+dnl AES_ROUND(table,a,b,c,d,out,ptr)
dnl Computes one word of the AES round. Leaves result in $6.
define(<AES_ROUND>, <
- movzbl BYTEREG($2), $7
- movl AES_TABLE0 ($1, $7,4),$6
- movl $3, $7
- shrl <$>8,$7
- andl <$>0xff,$7
+ movzbl LREG($2), $7
+ movl AES_TABLE0 ($1, $7, 4),$6
+ movzbl HREG($3), $7
xorl AES_TABLE1 ($1, $7, 4),$6
- movl $4,$7 C third one
+ movl $4,$7
shrl <$>16,$7
andl <$>0xff,$7
xorl AES_TABLE2 ($1, $7, 4),$6
- movl $5,$7 C fourth one
+ movl $5,$7
shrl <$>24,$7
xorl AES_TABLE3 ($1, $7, 4),$6>)dnl
@@ -74,8 +65,7 @@ dnl Note that we have to quote $ in constants.
define(<AES_FINAL_ROUND>, <
C FIXME: Perform substitution on least significant byte here,
C to save work later.
- movl $1,$5
- andl <$>0x000000ff,$5
+ movzbl LREG($1),$5
movl $2,$6
andl <$>0x0000ff00,$6
orl $6, $5
@@ -86,24 +76,24 @@ define(<AES_FINAL_ROUND>, <
andl <$>0xff000000,$6
orl $6, $5>)dnl
-dnl AES_SUBST_BYTE(A, B, C, D, table, tmp)
+dnl AES_SUBST_BYTE(A, B, C, D, table, ptr)
dnl Substitutes the least significant byte of
dnl each of eax, ebx, ecx and edx, and also rotates
dnl the words one byte to the left.
dnl Uses that AES_SBOX == 0
define(<AES_SUBST_BYTE>, <
- movzbl BYTEREG($1),$6
- movb ($5, $6),BYTEREG($1)
+ movzbl LREG($1),$6
+ movb ($5, $6),LREG($1)
roll <$>8,$1
- movzbl BYTEREG($2),$6
- movb ($5, $6),BYTEREG($2)
+ movzbl LREG($2),$6
+ movb ($5, $6),LREG($2)
roll <$>8,$2
- movzbl BYTEREG($3),$6
- movb ($5, $6),BYTEREG($3)
+ movzbl LREG($3),$6
+ movb ($5, $6),LREG($3)
roll <$>8,$3
- movzbl BYTEREG($4),$6
- movb ($5, $6),BYTEREG($4)
+ movzbl LREG($4),$6
+ movb ($5, $6),LREG($4)
roll <$>8,$4>)dnl