From 7a70cc7a7ee328fc8bcf4cca238d3ec422cb2517 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niels=20M=C3=B6ller?= Date: Mon, 25 Feb 2002 17:00:37 +0100 Subject: (_aes_crypt): Some peep hole optimizations, duplicating some instructions to fill nop:s, and put branch instructions on even word addresses. Rev: src/nettle/sparc/aes.asm:1.66 --- sparc/aes.asm | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/sparc/aes.asm b/sparc/aes.asm index e49a195d..c2420ba7 100644 --- a/sparc/aes.asm +++ b/sparc/aes.asm @@ -69,12 +69,12 @@ _aes_crypt: add T, AES_SIDX3, IDX3 ! Read src, and add initial subkey ! Difference between ctx and src. - ! NOTE: This instruction is duplicated in the delay slot + ! NOTE: These instruction is duplicated in the delay slot, + ! and the instruction before the branch sub ctx, src, %g2 - -.Lblock_loop: ! Difference between wtxt and src sub wtxt, src, %g3 +.Lblock_loop: ! For stop condition. Note that src is incremented in the ! delay slot add src, 8, %g4 @@ -105,6 +105,7 @@ _aes_crypt: add ctx, 16, key .Lround_loop: ! 4*i + ! NOTE: Instruction duplicated in delay slot mov 0, i .Linner_loop: ! The comments mark which j in T->table[j][ Bj(wtxt[IDXi(i)]) ] @@ -151,21 +152,23 @@ _aes_crypt: add i, 4, i ! switch roles for tmp and wtxt xor wtxt, diff, wtxt + xor tmp, diff, tmp + subcc round, 1, round - add key, 16, key - nop - bne .Lround_loop - xor tmp, diff, tmp + bne .Linner_loop + mov 0, i ! final round - ! 4*i - mov 0, i + ! Use round as the loop variable, as it's already zero +undefine() +define(i, round) -.Lfinal_loop: ! Comments mark which j in T->sbox[Bj(wtxt[IDXj(i)])] ! the instruction is part of + ! NOTE: First instruction duplicated in delay slot ld [IDX1+i], t1 ! 1 +.Lfinal_loop: ! IDX2(j) = j XOR 2 xor i, 8, t2 ! ld [idx-16], t2 ! 2 @@ -205,15 +208,15 @@ _aes_crypt: stb t3, [dst+3] stb t2, [dst+2] stb t0, [dst] + add dst, 4, dst bleu .Lfinal_loop - add dst, 4, dst - + ld [IDX1+i], t1 ! 1 addcc length, -16, length - nop + sub ctx, src, %g2 bne .Lblock_loop - sub ctx, src, %g2 + sub wtxt, src, %g3 .Lend: ret -- cgit v1.2.1