diff options
author | Niels Möller <nisse@lysator.liu.se> | 2013-05-16 11:09:30 +0200 |
---|---|---|
committer | Niels Möller <nisse@lysator.liu.se> | 2013-05-16 11:09:30 +0200 |
commit | 23cd4cf7aea5b4c13135dd38b6e88423a045836a (patch) | |
tree | 751f4bf72dc17e347dcdb7ddb93253129a73ee03 /arm | |
parent | 8136e192d9da6d501a9d19b81e63c15b8c9729b9 (diff) | |
download | nettle-23cd4cf7aea5b4c13135dd38b6e88423a045836a.tar.gz |
arm/v6: AES microptimization.
Diffstat (limited to 'arm')
-rw-r--r-- | arm/aes.m4 | 5 | ||||
-rw-r--r-- | arm/v6/aes-decrypt-internal.asm | 6 | ||||
-rw-r--r-- | arm/v6/aes-encrypt-internal.asm | 2 |
3 files changed, 4 insertions, 9 deletions
@@ -33,9 +33,8 @@ define(<AES_FINAL_ROUND>, < uxtb T0, $3, ror #16 ldrb T0, [TABLE, T0] eor $6, $6, T0, lsl #16 - uxtb T0, $4, ror #24 - ldrb T0, [TABLE, T0] + ldrb T0, [TABLE, $4, lsr #24] eor $6, $6, T0, lsl #24 ldr T0, [$5], #+4 - eor $6, T0 + eor $6, $6, T0 >) diff --git a/arm/v6/aes-decrypt-internal.asm b/arm/v6/aes-decrypt-internal.asm index 0c8cfc5e..e9b6e570 100644 --- a/arm/v6/aes-decrypt-internal.asm +++ b/arm/v6/aes-decrypt-internal.asm @@ -19,10 +19,6 @@ C MA 02111-1301, USA. include_src(<arm/aes.m4>) -C Benchmarked at at 785, 914, 1051 cycles/block on cortex A9, -C for 128, 192 and 256 bit key sizes. Unclear why it is slower -C than _aes_encrypt. - define(<CTX>, <r0>) define(<TABLE>, <r1>) define(<LENGTH>, <r2>) @@ -119,7 +115,7 @@ PROLOGUE(_nettle_aes_decrypt) push {r4,r5,r6,r7,r8,r10,r11,lr} nop C For some mysterious reason, taking out this nop - C slows this function down on Cortex-A9. + C slows this function down by 10(!) % on Cortex-A9. ALIGN(16) .Lblock_loop: mov KEY, CTX diff --git a/arm/v6/aes-encrypt-internal.asm b/arm/v6/aes-encrypt-internal.asm index 69556a35..6887b899 100644 --- a/arm/v6/aes-encrypt-internal.asm +++ b/arm/v6/aes-encrypt-internal.asm @@ -19,7 +19,7 @@ C MA 02111-1301, USA. include_src(<arm/aes.m4>) -C Benchmarked at at 693, 824, 950 cycles/block on cortex A9, +C Benchmarked at at 680, 818, 929 cycles/block on cortex A9, C for 128, 192 and 256 bit key sizes. C Possible improvements: More efficient load and store with |