diff options
author | Niels Möller <nisse@lysator.liu.se> | 2008-09-13 21:02:33 +0200 |
---|---|---|
committer | Niels Möller <nisse@lysator.liu.se> | 2008-09-13 21:02:33 +0200 |
commit | 832eebcc227137c768243a50abae8a325a39135b (patch) | |
tree | db9f933648329622511f64ed203d0aa6f4422d81 /x86 | |
parent | d7b85b91c61c01ed4dbab9c7d6029365ab36a795 (diff) | |
download | nettle-832eebcc227137c768243a50abae8a325a39135b.tar.gz |
Further micro optimizations.
Rev: nettle/x86/aes.m4:1.4
Diffstat (limited to 'x86')
-rw-r--r-- | x86/aes.m4 | 60 |
1 files changed, 25 insertions, 35 deletions
@@ -1,23 +1,16 @@ -dnl BYTEREG(reg) gives the 8-bit register corresponding to the given 32-bit register. +dnl LREG(reg) gives the 8-bit register corresponding to the given 32-bit register. dnl Use in AES_SUBST_BYTE below, and is used by both the x86 and the x86_64 assembler. -define(<BYTEREG>,<ifelse( +define(<LREG>,<ifelse( $1, %eax, %al, $1, %ebx, %bl, $1, %ecx, %cl, - $1, %edx, %dl, - dnl The rest are x86_64 only - $1, %esi, %sil, - $1, %edi, %dil, - $1, %ebp, %bpl, - $1, %esp, %spl, - $1, %r8d, %r8b, - $1, %r9d, %r9b, - $1, %r10d, %r10b, - $1, %r11d, %r11b, - $1, %r12d, %r12b, - $1, %r13d, %r13b, - $1, %r14d, %r14b, - $1, %r15d, %r15b)>)dnl + $1, %edx, %dl)>)dnl + +define(<HREG>,<ifelse( + $1, %eax, %ah, + $1, %ebx, %bh, + $1, %ecx, %ch, + $1, %edx, %dh)>)dnl dnl AES_LOAD(a, b, c, d, src, key) dnl Loads the next block of data from src, and add the subkey pointed @@ -51,20 +44,18 @@ define(<AES_STORE>, < movl $3,8($6) movl $4,12($6)>)dnl -dnl AES_ROUND(table,a,b,c,d,out,tmp) +dnl AES_ROUND(table,a,b,c,d,out,ptr) dnl Computes one word of the AES round. Leaves result in $6. define(<AES_ROUND>, < - movzbl BYTEREG($2), $7 - movl AES_TABLE0 ($1, $7,4),$6 - movl $3, $7 - shrl <$>8,$7 - andl <$>0xff,$7 + movzbl LREG($2), $7 + movl AES_TABLE0 ($1, $7, 4),$6 + movzbl HREG($3), $7 xorl AES_TABLE1 ($1, $7, 4),$6 - movl $4,$7 C third one + movl $4,$7 shrl <$>16,$7 andl <$>0xff,$7 xorl AES_TABLE2 ($1, $7, 4),$6 - movl $5,$7 C fourth one + movl $5,$7 shrl <$>24,$7 xorl AES_TABLE3 ($1, $7, 4),$6>)dnl @@ -74,8 +65,7 @@ dnl Note that we have to quote $ in constants. define(<AES_FINAL_ROUND>, < C FIXME: Perform substitution on least significant byte here, C to save work later. - movl $1,$5 - andl <$>0x000000ff,$5 + movzbl LREG($1),$5 movl $2,$6 andl <$>0x0000ff00,$6 orl $6, $5 @@ -86,24 +76,24 @@ define(<AES_FINAL_ROUND>, < andl <$>0xff000000,$6 orl $6, $5>)dnl -dnl AES_SUBST_BYTE(A, B, C, D, table, tmp) +dnl AES_SUBST_BYTE(A, B, C, D, table, ptr) dnl Substitutes the least significant byte of dnl each of eax, ebx, ecx and edx, and also rotates dnl the words one byte to the left. dnl Uses that AES_SBOX == 0 define(<AES_SUBST_BYTE>, < - movzbl BYTEREG($1),$6 - movb ($5, $6),BYTEREG($1) + movzbl LREG($1),$6 + movb ($5, $6),LREG($1) roll <$>8,$1 - movzbl BYTEREG($2),$6 - movb ($5, $6),BYTEREG($2) + movzbl LREG($2),$6 + movb ($5, $6),LREG($2) roll <$>8,$2 - movzbl BYTEREG($3),$6 - movb ($5, $6),BYTEREG($3) + movzbl LREG($3),$6 + movb ($5, $6),LREG($3) roll <$>8,$3 - movzbl BYTEREG($4),$6 - movb ($5, $6),BYTEREG($4) + movzbl LREG($4),$6 + movb ($5, $6),LREG($4) roll <$>8,$4>)dnl |