summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMakoto Kato <m_kato@ga2.so-net.ne.jp>2019-11-07 15:10:38 +0000
committerMakoto Kato <m_kato@ga2.so-net.ne.jp>2019-11-07 15:10:38 +0000
commitaa8a7576b93c41f79c47a85270434cb628caecb5 (patch)
tree2f32f54a10e584adaf27b0f2437c72564530f958
parent25b1e58fd386f2b55ea345e73e83ea89b29294e4 (diff)
downloadnss-hg-aa8a7576b93c41f79c47a85270434cb628caecb5.tar.gz
Bug 1592869 - Use NEON for ctr_xor. r=kjacobs
Using NEON for ctr_xor, aes_ctr can improve 30%-40%i decode/encode time on Cortex-A72. Differential Revision: https://phabricator.services.mozilla.com/D51380
-rw-r--r--lib/freebl/ctr.c13
1 files changed, 13 insertions, 0 deletions
diff --git a/lib/freebl/ctr.c b/lib/freebl/ctr.c
index 4d26a5b06..67ee72c0c 100644
--- a/lib/freebl/ctr.c
+++ b/lib/freebl/ctr.c
@@ -17,6 +17,10 @@
#include "rijndael.h"
#endif
+#if defined(__ARM_NEON) || defined(__ARM_NEON__)
+#include <arm_neon.h>
+#endif
+
SECStatus
CTR_InitContext(CTRContext *ctr, void *context, freeblCipherFunc cipher,
const unsigned char *param)
@@ -114,6 +118,15 @@ ctr_xor(unsigned char *target, const unsigned char *x,
const unsigned char *y, unsigned int count)
{
unsigned int i;
+#if defined(__ARM_NEON) || defined(__ARM_NEON__)
+ while (count >= 16) {
+ vst1q_u8(target, veorq_u8(vld1q_u8(x), vld1q_u8(y)));
+ target += 16;
+ x += 16;
+ y += 16;
+ count -= 16;
+ }
+#endif
for (i = 0; i < count; i++) {
*target++ = *x++ ^ *y++;
}