summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMamone Tarsha <maamoun.tk@googlemail.com>2021-07-30 14:45:28 +0300
committerMamone Tarsha <maamoun.tk@googlemail.com>2021-07-30 14:45:28 +0300
commit39d1e2a36bd50bd63e9a81e54e6dadd4c4ef515e (patch)
tree8b64f1721a33aa9e0fb3c57024b663158ae1fd50
parent2c59a6434f3c4d594f06a7875d503726694de381 (diff)
downloadnettle-39d1e2a36bd50bd63e9a81e54e6dadd4c4ef515e.tar.gz
[AArch64] Optimize AES with fat build support
-rw-r--r--arm64/crypto/aes128-decrypt.asm163
-rw-r--r--arm64/crypto/aes128-encrypt.asm163
-rw-r--r--arm64/crypto/aes192-decrypt.asm171
-rw-r--r--arm64/crypto/aes192-encrypt.asm171
-rw-r--r--arm64/crypto/aes256-decrypt.asm177
-rw-r--r--arm64/crypto/aes256-encrypt.asm177
-rw-r--r--arm64/fat/aes128-decrypt-2.asm36
-rw-r--r--arm64/fat/aes128-encrypt-2.asm36
-rw-r--r--arm64/fat/aes192-decrypt-2.asm36
-rw-r--r--arm64/fat/aes192-encrypt-2.asm36
-rw-r--r--arm64/fat/aes256-decrypt-2.asm36
-rw-r--r--arm64/fat/aes256-encrypt-2.asm36
-rw-r--r--configure.ac2
-rw-r--r--fat-arm64.c84
14 files changed, 1321 insertions, 3 deletions
diff --git a/arm64/crypto/aes128-decrypt.asm b/arm64/crypto/aes128-decrypt.asm
new file mode 100644
index 00000000..f459bb15
--- /dev/null
+++ b/arm64/crypto/aes128-decrypt.asm
@@ -0,0 +1,163 @@
+C arm64/crypto/aes128-decrypt.asm
+
+ifelse(`
+ Copyright (C) 2021 Mamone Tarsha
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+')
+
+.file "aes128-decrypt.asm"
+.arch armv8-a+crypto
+
+.text
+
+C Register usage:
+
+define(`KEYS', `x0')
+define(`LENGTH', `x1')
+define(`DST', `x2')
+define(`SRC', `x3')
+
+define(`S0', `v0')
+define(`S1', `v1')
+define(`S2', `v2')
+define(`S3', `v3')
+define(`K0', `v16')
+define(`K1', `v17')
+define(`K2', `v18')
+define(`K3', `v19')
+define(`K4', `v20')
+define(`K5', `v21')
+define(`K6', `v22')
+define(`K7', `v23')
+define(`K8', `v24')
+define(`K9', `v25')
+define(`K10', `v26')
+
+C AES decryption round of 4-blocks
+C AESD_ROUND_4B(KEY)
+define(`AESD_ROUND_4B', m4_assert_numargs(1)`
+ aesd S0.16b,$1.16b
+ aesimc S0.16b,S0.16b
+ aesd S1.16b,$1.16b
+ aesimc S1.16b,S1.16b
+ aesd S2.16b,$1.16b
+ aesimc S2.16b,S2.16b
+ aesd S3.16b,$1.16b
+ aesimc S3.16b,S3.16b
+')
+
+C AES last decryption round of 4-blocks
+C AESD_LAST_ROUND_4B(KEY)
+define(`AESD_LAST_ROUND_4B', m4_assert_numargs(2)`
+ aesd S0.16b,$1.16b
+ eor S0.16b,S0.16b,$2.16b
+ aesd S1.16b,$1.16b
+ eor S1.16b,S1.16b,$2.16b
+ aesd S2.16b,$1.16b
+ eor S2.16b,S2.16b,$2.16b
+ aesd S3.16b,$1.16b
+ eor S3.16b,S3.16b,$2.16b
+')
+
+C AES decryption round of 1-block
+C AESD_ROUND_1B(KEY)
+define(`AESD_ROUND_1B', m4_assert_numargs(1)`
+ aesd S0.16b,$1.16b
+ aesimc S0.16b,S0.16b
+')
+
+C AES last decryption round of 1-block
+C AESD_LAST_ROUND_1B(KEY)
+define(`AESD_LAST_ROUND_1B', m4_assert_numargs(2)`
+ aesd S0.16b,$1.16b
+ eor S0.16b,S0.16b,$2.16b
+')
+
+C void
+C aes128_decrypt(const struct aes128_ctx *ctx,
+C size_t length, uint8_t *dst,
+C const uint8_t *src)
+
+PROLOGUE(nettle_aes128_decrypt)
+ ands x4,LENGTH,#-64
+ b.eq L1B
+
+ mov x5,KEYS
+ ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[x5],#64
+ ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[x5],#64
+ ld1 {K8.4s,K9.4s,K10.4s},[x5]
+
+L4B_loop:
+ ld1 {S0.16b,S1.16b,S2.16b,S3.16b},[SRC],#64
+
+ AESD_ROUND_4B(K0)
+ AESD_ROUND_4B(K1)
+ AESD_ROUND_4B(K2)
+ AESD_ROUND_4B(K3)
+ AESD_ROUND_4B(K4)
+ AESD_ROUND_4B(K5)
+ AESD_ROUND_4B(K6)
+ AESD_ROUND_4B(K7)
+ AESD_ROUND_4B(K8)
+ AESD_LAST_ROUND_4B(K9,K10)
+
+ st1 {S0.16b,S1.16b,S2.16b,S3.16b},[DST],#64
+
+ subs x4,x4,#64
+ b.ne L4B_loop
+
+ and LENGTH,LENGTH,#63
+
+L1B:
+ cbz LENGTH,Ldone
+
+ ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64
+ ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64
+ ld1 {K8.4s,K9.4s,K10.4s},[KEYS]
+
+L1B_loop:
+ ld1 {S0.16b},[SRC],#16
+
+ AESD_ROUND_1B(K0)
+ AESD_ROUND_1B(K1)
+ AESD_ROUND_1B(K2)
+ AESD_ROUND_1B(K3)
+ AESD_ROUND_1B(K4)
+ AESD_ROUND_1B(K5)
+ AESD_ROUND_1B(K6)
+ AESD_ROUND_1B(K7)
+ AESD_ROUND_1B(K8)
+ AESD_LAST_ROUND_1B(K9,K10)
+
+ st1 {S0.16b},[DST],#16
+
+ subs LENGTH,LENGTH,#16
+ b.ne L1B_loop
+
+Ldone:
+ ret
+EPILOGUE(nettle_aes128_decrypt)
diff --git a/arm64/crypto/aes128-encrypt.asm b/arm64/crypto/aes128-encrypt.asm
new file mode 100644
index 00000000..b3c3b9ce
--- /dev/null
+++ b/arm64/crypto/aes128-encrypt.asm
@@ -0,0 +1,163 @@
+C arm64/crypto/aes128-encrypt.asm
+
+ifelse(`
+ Copyright (C) 2021 Mamone Tarsha
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+')
+
+.file "aes128-encrypt.asm"
+.arch armv8-a+crypto
+
+.text
+
+C Register usage:
+
+define(`KEYS', `x0')
+define(`LENGTH', `x1')
+define(`DST', `x2')
+define(`SRC', `x3')
+
+define(`S0', `v0')
+define(`S1', `v1')
+define(`S2', `v2')
+define(`S3', `v3')
+define(`K0', `v16')
+define(`K1', `v17')
+define(`K2', `v18')
+define(`K3', `v19')
+define(`K4', `v20')
+define(`K5', `v21')
+define(`K6', `v22')
+define(`K7', `v23')
+define(`K8', `v24')
+define(`K9', `v25')
+define(`K10', `v26')
+
+C AES encryption round of 4-blocks
+C AESE_ROUND_4B(KEY)
+define(`AESE_ROUND_4B', m4_assert_numargs(1)`
+ aese S0.16b,$1.16b
+ aesmc S0.16b,S0.16b
+ aese S1.16b,$1.16b
+ aesmc S1.16b,S1.16b
+ aese S2.16b,$1.16b
+ aesmc S2.16b,S2.16b
+ aese S3.16b,$1.16b
+ aesmc S3.16b,S3.16b
+')
+
+C AES last encryption round of 4-blocks
+C AESE_LAST_ROUND_4B(KEY)
+define(`AESE_LAST_ROUND_4B', m4_assert_numargs(2)`
+ aese S0.16b,$1.16b
+ eor S0.16b,S0.16b,$2.16b
+ aese S1.16b,$1.16b
+ eor S1.16b,S1.16b,$2.16b
+ aese S2.16b,$1.16b
+ eor S2.16b,S2.16b,$2.16b
+ aese S3.16b,$1.16b
+ eor S3.16b,S3.16b,$2.16b
+')
+
+C AES encryption round of 1-block
+C AESE_ROUND_1B(KEY)
+define(`AESE_ROUND_1B', m4_assert_numargs(1)`
+ aese S0.16b,$1.16b
+ aesmc S0.16b,S0.16b
+')
+
+C AES last encryption round of 1-block
+C AESE_LAST_ROUND_1B(KEY)
+define(`AESE_LAST_ROUND_1B', m4_assert_numargs(2)`
+ aese S0.16b,$1.16b
+ eor S0.16b,S0.16b,$2.16b
+')
+
+C void
+C aes128_encrypt(const struct aes128_ctx *ctx,
+C size_t length, uint8_t *dst,
+C const uint8_t *src)
+
+PROLOGUE(nettle_aes128_encrypt)
+ ands x4,LENGTH,#-64
+ b.eq L1B
+
+ mov x5,KEYS
+ ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[x5],#64
+ ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[x5],#64
+ ld1 {K8.4s,K9.4s,K10.4s},[x5]
+
+L4B_loop:
+ ld1 {S0.16b,S1.16b,S2.16b,S3.16b},[SRC],#64
+
+ AESE_ROUND_4B(K0)
+ AESE_ROUND_4B(K1)
+ AESE_ROUND_4B(K2)
+ AESE_ROUND_4B(K3)
+ AESE_ROUND_4B(K4)
+ AESE_ROUND_4B(K5)
+ AESE_ROUND_4B(K6)
+ AESE_ROUND_4B(K7)
+ AESE_ROUND_4B(K8)
+ AESE_LAST_ROUND_4B(K9,K10)
+
+ st1 {S0.16b,S1.16b,S2.16b,S3.16b},[DST],#64
+
+ subs x4,x4,#64
+ b.ne L4B_loop
+
+ and LENGTH,LENGTH,#63
+
+L1B:
+ cbz LENGTH,Ldone
+
+ ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64
+ ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64
+ ld1 {K8.4s,K9.4s,K10.4s},[KEYS]
+
+L1B_loop:
+ ld1 {S0.16b},[SRC],#16
+
+ AESE_ROUND_1B(K0)
+ AESE_ROUND_1B(K1)
+ AESE_ROUND_1B(K2)
+ AESE_ROUND_1B(K3)
+ AESE_ROUND_1B(K4)
+ AESE_ROUND_1B(K5)
+ AESE_ROUND_1B(K6)
+ AESE_ROUND_1B(K7)
+ AESE_ROUND_1B(K8)
+ AESE_LAST_ROUND_1B(K9,K10)
+
+ st1 {S0.16b},[DST],#16
+
+ subs LENGTH,LENGTH,#16
+ b.ne L1B_loop
+
+Ldone:
+ ret
+EPILOGUE(nettle_aes128_encrypt)
diff --git a/arm64/crypto/aes192-decrypt.asm b/arm64/crypto/aes192-decrypt.asm
new file mode 100644
index 00000000..8d6bca79
--- /dev/null
+++ b/arm64/crypto/aes192-decrypt.asm
@@ -0,0 +1,171 @@
+C arm64/crypto/aes192-decrypt.asm
+
+ifelse(`
+ Copyright (C) 2021 Mamone Tarsha
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+')
+
+.file "aes192-decrypt.asm"
+.arch armv8-a+crypto
+
+.text
+
+C Register usage:
+
+define(`KEYS', `x0')
+define(`LENGTH', `x1')
+define(`DST', `x2')
+define(`SRC', `x3')
+
+define(`S0', `v0')
+define(`S1', `v1')
+define(`S2', `v2')
+define(`S3', `v3')
+define(`K0', `v16')
+define(`K1', `v17')
+define(`K2', `v18')
+define(`K3', `v19')
+define(`K4', `v20')
+define(`K5', `v21')
+define(`K6', `v22')
+define(`K7', `v23')
+define(`K8', `v24')
+define(`K9', `v25')
+define(`K10', `v26')
+define(`K11', `v27')
+define(`K12', `v28')
+
+C AES decryption round of 4-blocks
+C AESD_ROUND_4B(KEY)
+define(`AESD_ROUND_4B', m4_assert_numargs(1)`
+ aesd S0.16b,$1.16b
+ aesimc S0.16b,S0.16b
+ aesd S1.16b,$1.16b
+ aesimc S1.16b,S1.16b
+ aesd S2.16b,$1.16b
+ aesimc S2.16b,S2.16b
+ aesd S3.16b,$1.16b
+ aesimc S3.16b,S3.16b
+')
+
+C AES last decryption round of 4-blocks
+C AESD_LAST_ROUND_4B(KEY)
+define(`AESD_LAST_ROUND_4B', m4_assert_numargs(2)`
+ aesd S0.16b,$1.16b
+ eor S0.16b,S0.16b,$2.16b
+ aesd S1.16b,$1.16b
+ eor S1.16b,S1.16b,$2.16b
+ aesd S2.16b,$1.16b
+ eor S2.16b,S2.16b,$2.16b
+ aesd S3.16b,$1.16b
+ eor S3.16b,S3.16b,$2.16b
+')
+
+C AES decryption round of 1-block
+C AESD_ROUND_1B(KEY)
+define(`AESD_ROUND_1B', m4_assert_numargs(1)`
+ aesd S0.16b,$1.16b
+ aesimc S0.16b,S0.16b
+')
+
+C AES last decryption round of 1-block
+C AESD_LAST_ROUND_1B(KEY)
+define(`AESD_LAST_ROUND_1B', m4_assert_numargs(2)`
+ aesd S0.16b,$1.16b
+ eor S0.16b,S0.16b,$2.16b
+')
+
+C void
+C aes192_decrypt(const struct aes192_ctx *ctx,
+C size_t length, uint8_t *dst,
+C const uint8_t *src)
+
+PROLOGUE(nettle_aes192_decrypt)
+ ands x4,LENGTH,#-64
+ b.eq L1B
+
+ mov x5,KEYS
+ ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[x5],#64
+ ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[x5],#64
+ ld1 {K8.4s,K9.4s,K10.4s,K11.4s},[x5],#64
+ ld1 {K12.4s},[x5]
+
+L4B_loop:
+ ld1 {S0.16b,S1.16b,S2.16b,S3.16b},[SRC],#64
+
+ AESD_ROUND_4B(K0)
+ AESD_ROUND_4B(K1)
+ AESD_ROUND_4B(K2)
+ AESD_ROUND_4B(K3)
+ AESD_ROUND_4B(K4)
+ AESD_ROUND_4B(K5)
+ AESD_ROUND_4B(K6)
+ AESD_ROUND_4B(K7)
+ AESD_ROUND_4B(K8)
+ AESD_ROUND_4B(K9)
+ AESD_ROUND_4B(K10)
+ AESD_LAST_ROUND_4B(K11,K12)
+
+ st1 {S0.16b,S1.16b,S2.16b,S3.16b},[DST],#64
+
+ subs x4,x4,#64
+ b.ne L4B_loop
+
+ and LENGTH,LENGTH,#63
+
+L1B:
+ cbz LENGTH,Ldone
+
+ ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64
+ ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64
+ ld1 {K8.4s,K9.4s,K10.4s,K11.4s},[KEYS],#64
+ ld1 {K12.4s},[KEYS]
+
+L1B_loop:
+ ld1 {S0.16b},[SRC],#16
+
+ AESD_ROUND_1B(K0)
+ AESD_ROUND_1B(K1)
+ AESD_ROUND_1B(K2)
+ AESD_ROUND_1B(K3)
+ AESD_ROUND_1B(K4)
+ AESD_ROUND_1B(K5)
+ AESD_ROUND_1B(K6)
+ AESD_ROUND_1B(K7)
+ AESD_ROUND_1B(K8)
+ AESD_ROUND_1B(K9)
+ AESD_ROUND_1B(K10)
+ AESD_LAST_ROUND_1B(K11,K12)
+
+ st1 {S0.16b},[DST],#16
+
+ subs LENGTH,LENGTH,#16
+ b.ne L1B_loop
+
+Ldone:
+ ret
+EPILOGUE(nettle_aes192_decrypt)
diff --git a/arm64/crypto/aes192-encrypt.asm b/arm64/crypto/aes192-encrypt.asm
new file mode 100644
index 00000000..5a71786d
--- /dev/null
+++ b/arm64/crypto/aes192-encrypt.asm
@@ -0,0 +1,171 @@
+C arm64/crypto/aes192-encrypt.asm
+
+ifelse(`
+ Copyright (C) 2021 Mamone Tarsha
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+')
+
+.file "aes192-encrypt.asm"
+.arch armv8-a+crypto
+
+.text
+
+C Register usage:
+
+define(`KEYS', `x0')
+define(`LENGTH', `x1')
+define(`DST', `x2')
+define(`SRC', `x3')
+
+define(`S0', `v0')
+define(`S1', `v1')
+define(`S2', `v2')
+define(`S3', `v3')
+define(`K0', `v16')
+define(`K1', `v17')
+define(`K2', `v18')
+define(`K3', `v19')
+define(`K4', `v20')
+define(`K5', `v21')
+define(`K6', `v22')
+define(`K7', `v23')
+define(`K8', `v24')
+define(`K9', `v25')
+define(`K10', `v26')
+define(`K11', `v27')
+define(`K12', `v28')
+
+C AES encryption round of 4-blocks
+C AESE_ROUND_4B(KEY)
+define(`AESE_ROUND_4B', m4_assert_numargs(1)`
+ aese S0.16b,$1.16b
+ aesmc S0.16b,S0.16b
+ aese S1.16b,$1.16b
+ aesmc S1.16b,S1.16b
+ aese S2.16b,$1.16b
+ aesmc S2.16b,S2.16b
+ aese S3.16b,$1.16b
+ aesmc S3.16b,S3.16b
+')
+
+C AES last encryption round of 4-blocks
+C AESE_LAST_ROUND_4B(KEY)
+define(`AESE_LAST_ROUND_4B', m4_assert_numargs(2)`
+ aese S0.16b,$1.16b
+ eor S0.16b,S0.16b,$2.16b
+ aese S1.16b,$1.16b
+ eor S1.16b,S1.16b,$2.16b
+ aese S2.16b,$1.16b
+ eor S2.16b,S2.16b,$2.16b
+ aese S3.16b,$1.16b
+ eor S3.16b,S3.16b,$2.16b
+')
+
+C AES encryption round of 1-block
+C AESE_ROUND_1B(KEY)
+define(`AESE_ROUND_1B', m4_assert_numargs(1)`
+ aese S0.16b,$1.16b
+ aesmc S0.16b,S0.16b
+')
+
+C AES last encryption round of 1-block
+C AESE_LAST_ROUND_1B(KEY)
+define(`AESE_LAST_ROUND_1B', m4_assert_numargs(2)`
+ aese S0.16b,$1.16b
+ eor S0.16b,S0.16b,$2.16b
+')
+
+C void
+C aes192_encrypt(const struct aes192_ctx *ctx,
+C size_t length, uint8_t *dst,
+C const uint8_t *src)
+
+PROLOGUE(nettle_aes192_encrypt)
+ ands x4,LENGTH,#-64
+ b.eq L1B
+
+ mov x5,KEYS
+ ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[x5],#64
+ ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[x5],#64
+ ld1 {K8.4s,K9.4s,K10.4s,K11.4s},[x5],#64
+ ld1 {K12.4s},[x5]
+
+L4B_loop:
+ ld1 {S0.16b,S1.16b,S2.16b,S3.16b},[SRC],#64
+
+ AESE_ROUND_4B(K0)
+ AESE_ROUND_4B(K1)
+ AESE_ROUND_4B(K2)
+ AESE_ROUND_4B(K3)
+ AESE_ROUND_4B(K4)
+ AESE_ROUND_4B(K5)
+ AESE_ROUND_4B(K6)
+ AESE_ROUND_4B(K7)
+ AESE_ROUND_4B(K8)
+ AESE_ROUND_4B(K9)
+ AESE_ROUND_4B(K10)
+ AESE_LAST_ROUND_4B(K11,K12)
+
+ st1 {S0.16b,S1.16b,S2.16b,S3.16b},[DST],#64
+
+ subs x4,x4,#64
+ b.ne L4B_loop
+
+ and LENGTH,LENGTH,#63
+
+L1B:
+ cbz LENGTH,Ldone
+
+ ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64
+ ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64
+ ld1 {K8.4s,K9.4s,K10.4s,K11.4s},[KEYS],#64
+ ld1 {K12.4s},[KEYS]
+
+L1B_loop:
+ ld1 {S0.16b},[SRC],#16
+
+ AESE_ROUND_1B(K0)
+ AESE_ROUND_1B(K1)
+ AESE_ROUND_1B(K2)
+ AESE_ROUND_1B(K3)
+ AESE_ROUND_1B(K4)
+ AESE_ROUND_1B(K5)
+ AESE_ROUND_1B(K6)
+ AESE_ROUND_1B(K7)
+ AESE_ROUND_1B(K8)
+ AESE_ROUND_1B(K9)
+ AESE_ROUND_1B(K10)
+ AESE_LAST_ROUND_1B(K11,K12)
+
+ st1 {S0.16b},[DST],#16
+
+ subs LENGTH,LENGTH,#16
+ b.ne L1B_loop
+
+Ldone:
+ ret
+EPILOGUE(nettle_aes192_encrypt)
diff --git a/arm64/crypto/aes256-decrypt.asm b/arm64/crypto/aes256-decrypt.asm
new file mode 100644
index 00000000..6191d7b6
--- /dev/null
+++ b/arm64/crypto/aes256-decrypt.asm
@@ -0,0 +1,177 @@
+C arm64/crypto/aes256-decrypt.asm
+
+ifelse(`
+ Copyright (C) 2021 Mamone Tarsha
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+')
+
+.file "aes256-decrypt.asm"
+.arch armv8-a+crypto
+
+.text
+
+C Register usage:
+
+define(`KEYS', `x0')
+define(`LENGTH', `x1')
+define(`DST', `x2')
+define(`SRC', `x3')
+
+define(`S0', `v0')
+define(`S1', `v1')
+define(`S2', `v2')
+define(`S3', `v3')
+define(`K0', `v16')
+define(`K1', `v17')
+define(`K2', `v18')
+define(`K3', `v19')
+define(`K4', `v20')
+define(`K5', `v21')
+define(`K6', `v22')
+define(`K7', `v23')
+define(`K8', `v24')
+define(`K9', `v25')
+define(`K10', `v26')
+define(`K11', `v27')
+define(`K12', `v28')
+define(`K13', `v29')
+define(`K14', `v30')
+
+C AES decryption round of 4-blocks
+C AESD_ROUND_4B(KEY)
+define(`AESD_ROUND_4B', m4_assert_numargs(1)`
+ aesd S0.16b,$1.16b
+ aesimc S0.16b,S0.16b
+ aesd S1.16b,$1.16b
+ aesimc S1.16b,S1.16b
+ aesd S2.16b,$1.16b
+ aesimc S2.16b,S2.16b
+ aesd S3.16b,$1.16b
+ aesimc S3.16b,S3.16b
+')
+
+C AES last decryption round of 4-blocks
+C AESD_LAST_ROUND_4B(KEY)
+define(`AESD_LAST_ROUND_4B', m4_assert_numargs(2)`
+ aesd S0.16b,$1.16b
+ eor S0.16b,S0.16b,$2.16b
+ aesd S1.16b,$1.16b
+ eor S1.16b,S1.16b,$2.16b
+ aesd S2.16b,$1.16b
+ eor S2.16b,S2.16b,$2.16b
+ aesd S3.16b,$1.16b
+ eor S3.16b,S3.16b,$2.16b
+')
+
+C AES decryption round of 1-block
+C AESD_ROUND_1B(KEY)
+define(`AESD_ROUND_1B', m4_assert_numargs(1)`
+ aesd S0.16b,$1.16b
+ aesimc S0.16b,S0.16b
+')
+
+C AES last decryption round of 1-block
+C AESD_LAST_ROUND_1B(KEY)
+define(`AESD_LAST_ROUND_1B', m4_assert_numargs(2)`
+ aesd S0.16b,$1.16b
+ eor S0.16b,S0.16b,$2.16b
+')
+
+C void
+C aes256_decrypt(const struct aes256_ctx *ctx,
+C size_t length, uint8_t *dst,
+C const uint8_t *src)
+
+PROLOGUE(nettle_aes256_decrypt)
+ ands x4,LENGTH,#-64
+ b.eq L1B
+
+ mov x5,KEYS
+ ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[x5],#64
+ ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[x5],#64
+ ld1 {K8.4s,K9.4s,K10.4s,K11.4s},[x5],#64
+ ld1 {K12.4s,K13.4s,K14.4s},[x5]
+
+L4B_loop:
+ ld1 {S0.16b,S1.16b,S2.16b,S3.16b},[SRC],#64
+
+ AESD_ROUND_4B(K0)
+ AESD_ROUND_4B(K1)
+ AESD_ROUND_4B(K2)
+ AESD_ROUND_4B(K3)
+ AESD_ROUND_4B(K4)
+ AESD_ROUND_4B(K5)
+ AESD_ROUND_4B(K6)
+ AESD_ROUND_4B(K7)
+ AESD_ROUND_4B(K8)
+ AESD_ROUND_4B(K9)
+ AESD_ROUND_4B(K10)
+ AESD_ROUND_4B(K11)
+ AESD_ROUND_4B(K12)
+ AESD_LAST_ROUND_4B(K13,K14)
+
+ st1 {S0.16b,S1.16b,S2.16b,S3.16b},[DST],#64
+
+ subs x4,x4,#64
+ b.ne L4B_loop
+
+ and LENGTH,LENGTH,#63
+
+L1B:
+ cbz LENGTH,Ldone
+
+ ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64
+ ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64
+ ld1 {K8.4s,K9.4s,K10.4s,K11.4s},[KEYS],#64
+ ld1 {K12.4s,K13.4s,K14.4s},[KEYS]
+
+L1B_loop:
+ ld1 {S0.16b},[SRC],#16
+
+ AESD_ROUND_1B(K0)
+ AESD_ROUND_1B(K1)
+ AESD_ROUND_1B(K2)
+ AESD_ROUND_1B(K3)
+ AESD_ROUND_1B(K4)
+ AESD_ROUND_1B(K5)
+ AESD_ROUND_1B(K6)
+ AESD_ROUND_1B(K7)
+ AESD_ROUND_1B(K8)
+ AESD_ROUND_1B(K9)
+ AESD_ROUND_1B(K10)
+ AESD_ROUND_1B(K11)
+ AESD_ROUND_1B(K12)
+ AESD_LAST_ROUND_1B(K13,K14)
+
+ st1 {S0.16b},[DST],#16
+
+ subs LENGTH,LENGTH,#16
+ b.ne L1B_loop
+
+Ldone:
+ ret
+EPILOGUE(nettle_aes256_decrypt)
diff --git a/arm64/crypto/aes256-encrypt.asm b/arm64/crypto/aes256-encrypt.asm
new file mode 100644
index 00000000..4a53c0c1
--- /dev/null
+++ b/arm64/crypto/aes256-encrypt.asm
@@ -0,0 +1,177 @@
+C arm64/crypto/aes256-encrypt.asm
+
+ifelse(`
+ Copyright (C) 2021 Mamone Tarsha
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+')
+
+.file "aes256-encrypt.asm"
+.arch armv8-a+crypto
+
+.text
+
+C Register usage:
+
+define(`KEYS', `x0')
+define(`LENGTH', `x1')
+define(`DST', `x2')
+define(`SRC', `x3')
+
+define(`S0', `v0')
+define(`S1', `v1')
+define(`S2', `v2')
+define(`S3', `v3')
+define(`K0', `v16')
+define(`K1', `v17')
+define(`K2', `v18')
+define(`K3', `v19')
+define(`K4', `v20')
+define(`K5', `v21')
+define(`K6', `v22')
+define(`K7', `v23')
+define(`K8', `v24')
+define(`K9', `v25')
+define(`K10', `v26')
+define(`K11', `v27')
+define(`K12', `v28')
+define(`K13', `v29')
+define(`K14', `v30')
+
+C AES encryption round of 4-blocks
+C AESE_ROUND_4B(KEY)
+define(`AESE_ROUND_4B', m4_assert_numargs(1)`
+ aese S0.16b,$1.16b
+ aesmc S0.16b,S0.16b
+ aese S1.16b,$1.16b
+ aesmc S1.16b,S1.16b
+ aese S2.16b,$1.16b
+ aesmc S2.16b,S2.16b
+ aese S3.16b,$1.16b
+ aesmc S3.16b,S3.16b
+')
+
+C AES last encryption round of 4-blocks
+C AESE_LAST_ROUND_4B(KEY)
+define(`AESE_LAST_ROUND_4B', m4_assert_numargs(2)`
+ aese S0.16b,$1.16b
+ eor S0.16b,S0.16b,$2.16b
+ aese S1.16b,$1.16b
+ eor S1.16b,S1.16b,$2.16b
+ aese S2.16b,$1.16b
+ eor S2.16b,S2.16b,$2.16b
+ aese S3.16b,$1.16b
+ eor S3.16b,S3.16b,$2.16b
+')
+
+C AES encryption round of 1-block
+C AESE_ROUND_1B(KEY)
+define(`AESE_ROUND_1B', m4_assert_numargs(1)`
+ aese S0.16b,$1.16b
+ aesmc S0.16b,S0.16b
+')
+
+C AES last encryption round of 1-block
+C AESE_LAST_ROUND_1B(KEY)
+define(`AESE_LAST_ROUND_1B', m4_assert_numargs(2)`
+ aese S0.16b,$1.16b
+ eor S0.16b,S0.16b,$2.16b
+')
+
+C void
+C aes256_encrypt(const struct aes256_ctx *ctx,
+C size_t length, uint8_t *dst,
+C const uint8_t *src)
+
+PROLOGUE(nettle_aes256_encrypt)
+ ands x4,LENGTH,#-64
+ b.eq L1B
+
+ mov x5,KEYS
+ ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[x5],#64
+ ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[x5],#64
+ ld1 {K8.4s,K9.4s,K10.4s,K11.4s},[x5],#64
+ ld1 {K12.4s,K13.4s,K14.4s},[x5]
+
+L4B_loop:
+ ld1 {S0.16b,S1.16b,S2.16b,S3.16b},[SRC],#64
+
+ AESE_ROUND_4B(K0)
+ AESE_ROUND_4B(K1)
+ AESE_ROUND_4B(K2)
+ AESE_ROUND_4B(K3)
+ AESE_ROUND_4B(K4)
+ AESE_ROUND_4B(K5)
+ AESE_ROUND_4B(K6)
+ AESE_ROUND_4B(K7)
+ AESE_ROUND_4B(K8)
+ AESE_ROUND_4B(K9)
+ AESE_ROUND_4B(K10)
+ AESE_ROUND_4B(K11)
+ AESE_ROUND_4B(K12)
+ AESE_LAST_ROUND_4B(K13,K14)
+
+ st1 {S0.16b,S1.16b,S2.16b,S3.16b},[DST],#64
+
+ subs x4,x4,#64
+ b.ne L4B_loop
+
+ and LENGTH,LENGTH,#63
+
+L1B:
+ cbz LENGTH,Ldone
+
+ ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64
+ ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64
+ ld1 {K8.4s,K9.4s,K10.4s,K11.4s},[KEYS],#64
+ ld1 {K12.4s,K13.4s,K14.4s},[KEYS]
+
+L1B_loop:
+ ld1 {S0.16b},[SRC],#16
+
+ AESE_ROUND_1B(K0)
+ AESE_ROUND_1B(K1)
+ AESE_ROUND_1B(K2)
+ AESE_ROUND_1B(K3)
+ AESE_ROUND_1B(K4)
+ AESE_ROUND_1B(K5)
+ AESE_ROUND_1B(K6)
+ AESE_ROUND_1B(K7)
+ AESE_ROUND_1B(K8)
+ AESE_ROUND_1B(K9)
+ AESE_ROUND_1B(K10)
+ AESE_ROUND_1B(K11)
+ AESE_ROUND_1B(K12)
+ AESE_LAST_ROUND_1B(K13,K14)
+
+ st1 {S0.16b},[DST],#16
+
+ subs LENGTH,LENGTH,#16
+ b.ne L1B_loop
+
+Ldone:
+ ret
+EPILOGUE(nettle_aes256_encrypt)
diff --git a/arm64/fat/aes128-decrypt-2.asm b/arm64/fat/aes128-decrypt-2.asm
new file mode 100644
index 00000000..ff336153
--- /dev/null
+++ b/arm64/fat/aes128-decrypt-2.asm
@@ -0,0 +1,36 @@
+C arm64/fat/aes128-decrypt.asm
+
+ifelse(`
+ Copyright (C) 2021 Mamone Tarsha
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+')
+
+dnl PROLOGUE(nettle_aes128_decrypt) picked up by configure
+
+define(`fat_transform', `_$1_arm64')
+include_src(`arm64/crypto/aes128-decrypt.asm')
diff --git a/arm64/fat/aes128-encrypt-2.asm b/arm64/fat/aes128-encrypt-2.asm
new file mode 100644
index 00000000..68cce46d
--- /dev/null
+++ b/arm64/fat/aes128-encrypt-2.asm
@@ -0,0 +1,36 @@
+C arm64/fat/aes128-encrypt.asm
+
+ifelse(`
+ Copyright (C) 2021 Mamone Tarsha
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+')
+
+dnl PROLOGUE(nettle_aes128_encrypt) picked up by configure
+
+define(`fat_transform', `_$1_arm64')
+include_src(`arm64/crypto/aes128-encrypt.asm')
diff --git a/arm64/fat/aes192-decrypt-2.asm b/arm64/fat/aes192-decrypt-2.asm
new file mode 100644
index 00000000..0937a06a
--- /dev/null
+++ b/arm64/fat/aes192-decrypt-2.asm
@@ -0,0 +1,36 @@
+C arm64/fat/aes192-decrypt.asm
+
+ifelse(`
+ Copyright (C) 2021 Mamone Tarsha
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+')
+
+dnl PROLOGUE(nettle_aes192_decrypt) picked up by configure
+
+define(`fat_transform', `_$1_arm64')
+include_src(`arm64/crypto/aes192-decrypt.asm')
diff --git a/arm64/fat/aes192-encrypt-2.asm b/arm64/fat/aes192-encrypt-2.asm
new file mode 100644
index 00000000..cf93bc21
--- /dev/null
+++ b/arm64/fat/aes192-encrypt-2.asm
@@ -0,0 +1,36 @@
+C arm64/fat/aes192-encrypt.asm
+
+ifelse(`
+ Copyright (C) 2021 Mamone Tarsha
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+')
+
+dnl PROLOGUE(nettle_aes192_encrypt) picked up by configure
+
+define(`fat_transform', `_$1_arm64')
+include_src(`arm64/crypto/aes192-encrypt.asm')
diff --git a/arm64/fat/aes256-decrypt-2.asm b/arm64/fat/aes256-decrypt-2.asm
new file mode 100644
index 00000000..43fc49aa
--- /dev/null
+++ b/arm64/fat/aes256-decrypt-2.asm
@@ -0,0 +1,36 @@
+C arm64/fat/aes256-decrypt.asm
+
+ifelse(`
+ Copyright (C) 2021 Mamone Tarsha
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+')
+
+dnl PROLOGUE(nettle_aes256_decrypt) picked up by configure
+
+define(`fat_transform', `_$1_arm64')
+include_src(`arm64/crypto/aes256-decrypt.asm')
diff --git a/arm64/fat/aes256-encrypt-2.asm b/arm64/fat/aes256-encrypt-2.asm
new file mode 100644
index 00000000..26f4ac6d
--- /dev/null
+++ b/arm64/fat/aes256-encrypt-2.asm
@@ -0,0 +1,36 @@
+C arm64/fat/aes256-encrypt.asm
+
+ifelse(`
+ Copyright (C) 2021 Mamone Tarsha
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+')
+
+dnl PROLOGUE(nettle_aes256_encrypt) picked up by configure
+
+define(`fat_transform', `_$1_arm64')
+include_src(`arm64/crypto/aes256-encrypt.asm')
diff --git a/configure.ac b/configure.ac
index 44add732..1c34a2d9 100644
--- a/configure.ac
+++ b/configure.ac
@@ -495,7 +495,7 @@ if test "x$enable_assembler" = xyes ; then
if test "x$enable_fat" = xyes ; then
asm_path="arm64/fat $asm_path"
OPT_NETTLE_SOURCES="fat-arm64.c $OPT_NETTLE_SOURCES"
- FAT_TEST_LIST="none pmull sha1 sha2"
+ FAT_TEST_LIST="none aes pmull sha1 sha2"
else
if test "$enable_arm64_crypto" = yes ; then
asm_path="arm64/crypto $asm_path"
diff --git a/fat-arm64.c b/fat-arm64.c
index 9bcb208a..fcb2ece8 100644
--- a/fat-arm64.c
+++ b/fat-arm64.c
@@ -50,6 +50,7 @@
#include "nettle-types.h"
+#include "aes.h"
#include "gcm.h"
#include "gcm-internal.h"
#include "fat-setup.h"
@@ -58,6 +59,9 @@
#ifndef HWCAP_ASIMD
#define HWCAP_ASIMD (1 << 1)
#endif
+#ifndef HWCAP_AES
+#define HWCAP_AES (1 << 3)
+#endif
#ifndef HWCAP_PMULL
#define HWCAP_PMULL (1 << 4)
#endif
@@ -70,6 +74,7 @@
struct arm64_features
{
+ int have_aes;
int have_pmull;
int have_sha1;
int have_sha2;
@@ -82,6 +87,7 @@ static void
get_arm64_features (struct arm64_features *features)
{
const char *s;
+ features->have_aes = 0;
features->have_pmull = 0;
features->have_sha1 = 0;
features->have_sha2 = 0;
@@ -93,7 +99,9 @@ get_arm64_features (struct arm64_features *features)
const char *sep = strchr (s, ',');
size_t length = sep ? (size_t) (sep - s) : strlen(s);
- if (MATCH (s, length, "pmull", 5))
+ if (MATCH (s, length, "aes", 3))
+ features->have_aes = 1;
+ else if (MATCH (s, length, "pmull", 5))
features->have_pmull = 1;
else if (MATCH (s, length, "sha1", 4))
features->have_sha1 = 1;
@@ -107,6 +115,8 @@ get_arm64_features (struct arm64_features *features)
{
#if USE_GETAUXVAL
unsigned long hwcap = getauxval(AT_HWCAP);
+ features->have_aes
+ = ((hwcap & (HWCAP_ASIMD | HWCAP_AES)) == (HWCAP_ASIMD | HWCAP_AES));
features->have_pmull
= ((hwcap & (HWCAP_ASIMD | HWCAP_PMULL)) == (HWCAP_ASIMD | HWCAP_PMULL));
features->have_sha1
@@ -117,6 +127,27 @@ get_arm64_features (struct arm64_features *features)
}
}
+DECLARE_FAT_FUNC(nettle_aes128_encrypt, aes128_crypt_func)
+DECLARE_FAT_FUNC_VAR(aes128_encrypt, aes128_crypt_func, c)
+DECLARE_FAT_FUNC_VAR(aes128_encrypt, aes128_crypt_func, arm64)
+DECLARE_FAT_FUNC(nettle_aes128_decrypt, aes128_crypt_func)
+DECLARE_FAT_FUNC_VAR(aes128_decrypt, aes128_crypt_func, c)
+DECLARE_FAT_FUNC_VAR(aes128_decrypt, aes128_crypt_func, arm64)
+
+DECLARE_FAT_FUNC(nettle_aes192_encrypt, aes192_crypt_func)
+DECLARE_FAT_FUNC_VAR(aes192_encrypt, aes192_crypt_func, c)
+DECLARE_FAT_FUNC_VAR(aes192_encrypt, aes192_crypt_func, arm64)
+DECLARE_FAT_FUNC(nettle_aes192_decrypt, aes192_crypt_func)
+DECLARE_FAT_FUNC_VAR(aes192_decrypt, aes192_crypt_func, c)
+DECLARE_FAT_FUNC_VAR(aes192_decrypt, aes192_crypt_func, arm64)
+
+DECLARE_FAT_FUNC(nettle_aes256_encrypt, aes256_crypt_func)
+DECLARE_FAT_FUNC_VAR(aes256_encrypt, aes256_crypt_func, c)
+DECLARE_FAT_FUNC_VAR(aes256_encrypt, aes256_crypt_func, arm64)
+DECLARE_FAT_FUNC(nettle_aes256_decrypt, aes256_crypt_func)
+DECLARE_FAT_FUNC_VAR(aes256_decrypt, aes256_crypt_func, c)
+DECLARE_FAT_FUNC_VAR(aes256_decrypt, aes256_crypt_func, arm64)
+
#if GCM_TABLE_BITS == 8
DECLARE_FAT_FUNC(_nettle_gcm_init_key, gcm_init_key_func)
DECLARE_FAT_FUNC_VAR(gcm_init_key, gcm_init_key_func, c)
@@ -145,11 +176,33 @@ fat_init (void)
verbose = getenv (ENV_VERBOSE) != NULL;
if (verbose)
- fprintf (stderr, "libnettle: cpu features:%s%s%s\n",
+ fprintf (stderr, "libnettle: cpu features:%s%s%s%s\n",
+ features.have_aes ? " aes instructions" : "",
features.have_pmull ? " polynomial multiply long instructions (PMULL/PMULL2)" : "",
features.have_sha1 ? " sha1 instructions" : "",
features.have_sha2 ? " sha2 instructions" : "");
+ if (features.have_aes)
+ {
+ if (verbose)
+ fprintf (stderr, "libnettle: enabling hardware accelerated AES encrypt/decrypt code.\n");
+ nettle_aes128_encrypt_vec = _nettle_aes128_encrypt_arm64;
+ nettle_aes128_decrypt_vec = _nettle_aes128_decrypt_arm64;
+ nettle_aes192_encrypt_vec = _nettle_aes192_encrypt_arm64;
+ nettle_aes192_decrypt_vec = _nettle_aes192_decrypt_arm64;
+ nettle_aes256_encrypt_vec = _nettle_aes256_encrypt_arm64;
+ nettle_aes256_decrypt_vec = _nettle_aes256_decrypt_arm64;
+ }
+ else
+ {
+ nettle_aes128_encrypt_vec = _nettle_aes128_encrypt_c;
+ nettle_aes128_decrypt_vec = _nettle_aes128_decrypt_c;
+ nettle_aes192_encrypt_vec = _nettle_aes192_encrypt_c;
+ nettle_aes192_decrypt_vec = _nettle_aes192_decrypt_c;
+ nettle_aes256_encrypt_vec = _nettle_aes256_encrypt_c;
+ nettle_aes256_decrypt_vec = _nettle_aes256_decrypt_c;
+ }
+
if (features.have_pmull)
{
if (verbose)
@@ -192,6 +245,33 @@ fat_init (void)
}
}
+DEFINE_FAT_FUNC(nettle_aes128_encrypt, void,
+ (const struct aes128_ctx *ctx, size_t length,
+ uint8_t *dst,const uint8_t *src),
+ (ctx, length, dst, src))
+DEFINE_FAT_FUNC(nettle_aes128_decrypt, void,
+ (const struct aes128_ctx *ctx, size_t length,
+ uint8_t *dst,const uint8_t *src),
+ (ctx, length, dst, src))
+
+DEFINE_FAT_FUNC(nettle_aes192_encrypt, void,
+ (const struct aes192_ctx *ctx, size_t length,
+ uint8_t *dst,const uint8_t *src),
+ (ctx, length, dst, src))
+DEFINE_FAT_FUNC(nettle_aes192_decrypt, void,
+ (const struct aes192_ctx *ctx, size_t length,
+ uint8_t *dst,const uint8_t *src),
+ (ctx, length, dst, src))
+
+DEFINE_FAT_FUNC(nettle_aes256_encrypt, void,
+ (const struct aes256_ctx *ctx, size_t length,
+ uint8_t *dst,const uint8_t *src),
+ (ctx, length, dst, src))
+DEFINE_FAT_FUNC(nettle_aes256_decrypt, void,
+ (const struct aes256_ctx *ctx, size_t length,
+ uint8_t *dst,const uint8_t *src),
+ (ctx, length, dst, src))
+
#if GCM_TABLE_BITS == 8
DEFINE_FAT_FUNC(_nettle_gcm_init_key, void,
(union nettle_block16 *table),