summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNiels Möller <nisse@lysator.liu.se>2002-02-25 17:00:37 +0100
committerNiels Möller <nisse@lysator.liu.se>2002-02-25 17:00:37 +0100
commit7a70cc7a7ee328fc8bcf4cca238d3ec422cb2517 (patch)
tree9b6ddbda4a1c22bdd67e8e547e1a7620281013ef
parenta06b715eccea6dcf6d6c175986c1c25c9d66438a (diff)
downloadnettle-7a70cc7a7ee328fc8bcf4cca238d3ec422cb2517.tar.gz
(_aes_crypt): Some peep hole optimizations, duplicating some
instructions to fill nop:s, and put branch instructions on even word addresses. Rev: src/nettle/sparc/aes.asm:1.66
-rw-r--r--sparc/aes.asm31
1 files changed, 17 insertions, 14 deletions
diff --git a/sparc/aes.asm b/sparc/aes.asm
index e49a195d..c2420ba7 100644
--- a/sparc/aes.asm
+++ b/sparc/aes.asm
@@ -69,12 +69,12 @@ _aes_crypt:
add T, AES_SIDX3, IDX3
! Read src, and add initial subkey
! Difference between ctx and src.
- ! NOTE: This instruction is duplicated in the delay slot
+ ! NOTE: These instruction is duplicated in the delay slot,
+ ! and the instruction before the branch
sub ctx, src, %g2
-
-.Lblock_loop:
! Difference between wtxt and src
sub wtxt, src, %g3
+.Lblock_loop:
! For stop condition. Note that src is incremented in the
! delay slot
add src, 8, %g4
@@ -105,6 +105,7 @@ _aes_crypt:
add ctx, 16, key
.Lround_loop:
! 4*i
+ ! NOTE: Instruction duplicated in delay slot
mov 0, i
.Linner_loop:
! The comments mark which j in T->table[j][ Bj(wtxt[IDXi(i)]) ]
@@ -151,21 +152,23 @@ _aes_crypt:
add i, 4, i
! switch roles for tmp and wtxt
xor wtxt, diff, wtxt
+ xor tmp, diff, tmp
+
subcc round, 1, round
-
add key, 16, key
- nop
- bne .Lround_loop
- xor tmp, diff, tmp
+ bne .Linner_loop
+ mov 0, i
! final round
- ! 4*i
- mov 0, i
+ ! Use round as the loop variable, as it's already zero
+undefine(<i>)
+define(i, round)
-.Lfinal_loop:
! Comments mark which j in T->sbox[Bj(wtxt[IDXj(i)])]
! the instruction is part of
+ ! NOTE: First instruction duplicated in delay slot
ld [IDX1+i], t1 ! 1
+.Lfinal_loop:
! IDX2(j) = j XOR 2
xor i, 8, t2
! ld [idx-16], t2 ! 2
@@ -205,15 +208,15 @@ _aes_crypt:
stb t3, [dst+3]
stb t2, [dst+2]
stb t0, [dst]
+ add dst, 4, dst
bleu .Lfinal_loop
- add dst, 4, dst
-
+ ld [IDX1+i], t1 ! 1
addcc length, -16, length
- nop
+ sub ctx, src, %g2
bne .Lblock_loop
- sub ctx, src, %g2
+ sub wtxt, src, %g3
.Lend:
ret