summaryrefslogtreecommitdiff
path: root/armv7
diff options
context:
space:
mode:
authorNiels Möller <nisse@lysator.liu.se>2013-04-12 13:19:41 +0200
committerNiels Möller <nisse@lysator.liu.se>2013-04-12 13:19:41 +0200
commitcdde35bb2530c4a29cb72dea9d5207a9c954c80f (patch)
tree464ba9ff41efc509ce193d151b067a40a08d9a7c /armv7
parent3be646d1cdbffbceef956de554a005320a1aa63d (diff)
downloadnettle-cdde35bb2530c4a29cb72dea9d5207a9c954c80f.tar.gz
ARM umac_nh: Use vmlal, 16% speedup.
Diffstat (limited to 'armv7')
-rw-r--r--armv7/umac-nh.asm14
1 files changed, 6 insertions, 8 deletions
diff --git a/armv7/umac-nh.asm b/armv7/umac-nh.asm
index 7c0a0290..87cb86d0 100644
--- a/armv7/umac-nh.asm
+++ b/armv7/umac-nh.asm
@@ -30,7 +30,7 @@ define(<QB>, <q1>)
define(<DM>, <d16>)
define(<QLEFT>, <q9>)
define(<QRIGHT>, <q10>)
-define(<QACC>, <q11>)
+define(<QY>, <q11>)
define(<QT0>, <q12>)
define(<QT1>, <q13>)
define(<QK0>, <q14>)
@@ -59,7 +59,7 @@ PROLOGUE(_nettle_umac_nh)
vmov.i32 D0REG(QLEFT)[0], SHIFT
vmov.32 D1REG(QLEFT), D0REG(QLEFT)
- vmov.i64 QACC, #0
+ vmov.i64 QY, #0
vshl.u64 DM, DM, D0REG(QRIGHT)
.Loop:
@@ -78,14 +78,12 @@ PROLOGUE(_nettle_umac_nh)
vld1.i32 {QK0, QK1}, [KEY]!
vadd.i32 QA, QA, QK0
vadd.i32 QB, QB, QK1
- vmull.u32 QT0, D0REG(QA), D0REG(QB)
- vmull.u32 QT1, D1REG(QA), D1REG(QB)
subs LENGTH, LENGTH, #32
- vadd.i64 QACC, QACC, QT0
- vadd.i64 QACC, QACC, QT1
+ vmlal.u32 QY, D0REG(QA), D0REG(QB)
+ vmlal.u32 QY, D1REG(QA), D1REG(QB)
bhi .Loop
- vadd.i64 D0REG(QACC), D0REG(QACC), D1REG(QACC)
- vmov r0, r1, D0REG(QACC)
+ vadd.i64 D0REG(QY), D0REG(QY), D1REG(QY)
+ vmov r0, r1, D0REG(QY)
bx lr
EPILOGUE(_nettle_umac_nh)