diff options
author | Niels Möller <nisse@lysator.liu.se> | 2021-08-09 14:51:10 +0000 |
---|---|---|
committer | Niels Möller <nisse@lysator.liu.se> | 2021-08-09 14:51:10 +0000 |
commit | d351a828579f1ffd0a837d68ed3c7f1c7d808f38 (patch) | |
tree | c13b1b7b8b2c7139b0827536b0af6e3b30f4c3b1 | |
parent | 76c7418c1ab8efb67dc42bbf8cbfa322b6ea1693 (diff) | |
parent | a47813c241804fa3b60f6b1cdf2f0569214fc827 (diff) | |
download | nettle-d351a828579f1ffd0a837d68ed3c7f1c7d808f38.tar.gz |
Merge branch 'arm64-aes' into 'master'
[AArch64] Optimize AES with fat build support
This patch optimizes AES encrypt/decrypt functions with each key size has its own implementation to load the key expansion just once at function prologue which yields a considerable performance increase over loading the key expansion for every block iteration. The patch also adds fat build support for the AES functions.
`make check` passes all tests. Benchmark of executing `examples/nettle-benchmark`:
| Algorithm | mode | C (Mbyte/s) | OpenSSL (Mbyte/s) | This patch (Mbyte/s) |
| ------ | ------ | ------ | ------ | ------ |
| aes128 | ECB encrypt | 95.01 | 1037.85 | 2579.62 |
| aes128 | ECB decrypt | 93.47 | 1005.15 | 2577.53 |
| aes192 | ECB encrypt | 79.60 | 893.34 | 2205.53 |
| aes192 | ECB decrypt | 78.34 | 889.17 | 2204.41 |
| aes256 | ECB encrypt | 66.64 | 782.21 | 1925.73 |
| aes256 | ECB decrypt | 65.81 | 781.37 | 1925.79 |
See merge request nettle/nettle!34
-rw-r--r-- | arm64/crypto/aes128-decrypt.asm | 118 | ||||
-rw-r--r-- | arm64/crypto/aes128-encrypt.asm | 118 | ||||
-rw-r--r-- | arm64/crypto/aes192-decrypt.asm | 125 | ||||
-rw-r--r-- | arm64/crypto/aes192-encrypt.asm | 125 | ||||
-rw-r--r-- | arm64/crypto/aes256-decrypt.asm | 131 | ||||
-rw-r--r-- | arm64/crypto/aes256-encrypt.asm | 131 | ||||
-rw-r--r-- | arm64/fat/aes128-decrypt-2.asm | 36 | ||||
-rw-r--r-- | arm64/fat/aes128-encrypt-2.asm | 36 | ||||
-rw-r--r-- | arm64/fat/aes192-decrypt-2.asm | 36 | ||||
-rw-r--r-- | arm64/fat/aes192-encrypt-2.asm | 36 | ||||
-rw-r--r-- | arm64/fat/aes256-decrypt-2.asm | 36 | ||||
-rw-r--r-- | arm64/fat/aes256-encrypt-2.asm | 36 | ||||
-rw-r--r-- | arm64/machine.m4 | 64 | ||||
-rw-r--r-- | configure.ac | 2 | ||||
-rw-r--r-- | fat-arm64.c | 84 |
15 files changed, 1111 insertions, 3 deletions
diff --git a/arm64/crypto/aes128-decrypt.asm b/arm64/crypto/aes128-decrypt.asm new file mode 100644 index 00000000..aadfc480 --- /dev/null +++ b/arm64/crypto/aes128-decrypt.asm @@ -0,0 +1,118 @@ +C arm64/crypto/aes128-decrypt.asm + +ifelse(` + Copyright (C) 2021 Mamone Tarsha + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +') + +.file "aes128-decrypt.asm" +.arch armv8-a+crypto + +.text + +C Register usage: + +define(`KEYS', `x0') +define(`LENGTH', `x1') +define(`DST', `x2') +define(`SRC', `x3') + +define(`S0', `v0') +define(`S1', `v1') +define(`S2', `v2') +define(`S3', `v3') +define(`K0', `v16') +define(`K1', `v17') +define(`K2', `v18') +define(`K3', `v19') +define(`K4', `v20') +define(`K5', `v21') +define(`K6', `v22') +define(`K7', `v23') +define(`K8', `v24') +define(`K9', `v25') +define(`K10', `v26') + +C void +C aes128_decrypt(const struct aes128_ctx *ctx, +C size_t length, uint8_t *dst, +C const uint8_t *src) + +PROLOGUE(nettle_aes128_decrypt) + ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64 + ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64 + ld1 {K8.4s,K9.4s,K10.4s},[KEYS] + + ands x4,LENGTH,#-64 + b.eq L1B + +L4B_loop: + ld1 {S0.16b,S1.16b,S2.16b,S3.16b},[SRC],#64 + + AESD_ROUND_4B(S0,S1,S2,S3,K0) + AESD_ROUND_4B(S0,S1,S2,S3,K1) + AESD_ROUND_4B(S0,S1,S2,S3,K2) + AESD_ROUND_4B(S0,S1,S2,S3,K3) + AESD_ROUND_4B(S0,S1,S2,S3,K4) + AESD_ROUND_4B(S0,S1,S2,S3,K5) + AESD_ROUND_4B(S0,S1,S2,S3,K6) + AESD_ROUND_4B(S0,S1,S2,S3,K7) + AESD_ROUND_4B(S0,S1,S2,S3,K8) + AESD_LAST_ROUND_4B(S0,S1,S2,S3,K9,K10) + + st1 {S0.16b,S1.16b,S2.16b,S3.16b},[DST],#64 + + subs x4,x4,#64 + b.ne L4B_loop + + and LENGTH,LENGTH,#63 + +L1B: + cbz LENGTH,Ldone + +L1B_loop: + ld1 {S0.16b},[SRC],#16 + + AESD_ROUND_1B(S0,K0) + AESD_ROUND_1B(S0,K1) + AESD_ROUND_1B(S0,K2) + AESD_ROUND_1B(S0,K3) + AESD_ROUND_1B(S0,K4) + AESD_ROUND_1B(S0,K5) + AESD_ROUND_1B(S0,K6) + AESD_ROUND_1B(S0,K7) + AESD_ROUND_1B(S0,K8) + AESD_LAST_ROUND_1B(S0,K9,K10) + + st1 {S0.16b},[DST],#16 + + subs LENGTH,LENGTH,#16 + b.ne L1B_loop + +Ldone: + ret +EPILOGUE(nettle_aes128_decrypt) diff --git a/arm64/crypto/aes128-encrypt.asm b/arm64/crypto/aes128-encrypt.asm new file mode 100644 index 00000000..3d9b9d90 --- /dev/null +++ b/arm64/crypto/aes128-encrypt.asm @@ -0,0 +1,118 @@ +C arm64/crypto/aes128-encrypt.asm + +ifelse(` + Copyright (C) 2021 Mamone Tarsha + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +') + +.file "aes128-encrypt.asm" +.arch armv8-a+crypto + +.text + +C Register usage: + +define(`KEYS', `x0') +define(`LENGTH', `x1') +define(`DST', `x2') +define(`SRC', `x3') + +define(`S0', `v0') +define(`S1', `v1') +define(`S2', `v2') +define(`S3', `v3') +define(`K0', `v16') +define(`K1', `v17') +define(`K2', `v18') +define(`K3', `v19') +define(`K4', `v20') +define(`K5', `v21') +define(`K6', `v22') +define(`K7', `v23') +define(`K8', `v24') +define(`K9', `v25') +define(`K10', `v26') + +C void +C aes128_encrypt(const struct aes128_ctx *ctx, +C size_t length, uint8_t *dst, +C const uint8_t *src) + +PROLOGUE(nettle_aes128_encrypt) + ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64 + ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64 + ld1 {K8.4s,K9.4s,K10.4s},[KEYS] + + ands x4,LENGTH,#-64 + b.eq L1B + +L4B_loop: + ld1 {S0.16b,S1.16b,S2.16b,S3.16b},[SRC],#64 + + AESE_ROUND_4B(S0,S1,S2,S3,K0) + AESE_ROUND_4B(S0,S1,S2,S3,K1) + AESE_ROUND_4B(S0,S1,S2,S3,K2) + AESE_ROUND_4B(S0,S1,S2,S3,K3) + AESE_ROUND_4B(S0,S1,S2,S3,K4) + AESE_ROUND_4B(S0,S1,S2,S3,K5) + AESE_ROUND_4B(S0,S1,S2,S3,K6) + AESE_ROUND_4B(S0,S1,S2,S3,K7) + AESE_ROUND_4B(S0,S1,S2,S3,K8) + AESE_LAST_ROUND_4B(S0,S1,S2,S3,K9,K10) + + st1 {S0.16b,S1.16b,S2.16b,S3.16b},[DST],#64 + + subs x4,x4,#64 + b.ne L4B_loop + + and LENGTH,LENGTH,#63 + +L1B: + cbz LENGTH,Ldone + +L1B_loop: + ld1 {S0.16b},[SRC],#16 + + AESE_ROUND_1B(S0,K0) + AESE_ROUND_1B(S0,K1) + AESE_ROUND_1B(S0,K2) + AESE_ROUND_1B(S0,K3) + AESE_ROUND_1B(S0,K4) + AESE_ROUND_1B(S0,K5) + AESE_ROUND_1B(S0,K6) + AESE_ROUND_1B(S0,K7) + AESE_ROUND_1B(S0,K8) + AESE_LAST_ROUND_1B(S0,K9,K10) + + st1 {S0.16b},[DST],#16 + + subs LENGTH,LENGTH,#16 + b.ne L1B_loop + +Ldone: + ret +EPILOGUE(nettle_aes128_encrypt) diff --git a/arm64/crypto/aes192-decrypt.asm b/arm64/crypto/aes192-decrypt.asm new file mode 100644 index 00000000..769edd15 --- /dev/null +++ b/arm64/crypto/aes192-decrypt.asm @@ -0,0 +1,125 @@ +C arm64/crypto/aes192-decrypt.asm + +ifelse(` + Copyright (C) 2021 Mamone Tarsha + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +') + +.file "aes192-decrypt.asm" +.arch armv8-a+crypto + +.text + +C Register usage: + +define(`KEYS', `x0') +define(`LENGTH', `x1') +define(`DST', `x2') +define(`SRC', `x3') + +define(`S0', `v0') +define(`S1', `v1') +define(`S2', `v2') +define(`S3', `v3') +define(`K0', `v16') +define(`K1', `v17') +define(`K2', `v18') +define(`K3', `v19') +define(`K4', `v20') +define(`K5', `v21') +define(`K6', `v22') +define(`K7', `v23') +define(`K8', `v24') +define(`K9', `v25') +define(`K10', `v26') +define(`K11', `v27') +define(`K12', `v28') + +C void +C aes192_decrypt(const struct aes192_ctx *ctx, +C size_t length, uint8_t *dst, +C const uint8_t *src) + +PROLOGUE(nettle_aes192_decrypt) + ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64 + ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64 + ld1 {K8.4s,K9.4s,K10.4s,K11.4s},[KEYS],#64 + ld1 {K12.4s},[KEYS] + + ands x4,LENGTH,#-64 + b.eq L1B + +L4B_loop: + ld1 {S0.16b,S1.16b,S2.16b,S3.16b},[SRC],#64 + + AESD_ROUND_4B(S0,S1,S2,S3,K0) + AESD_ROUND_4B(S0,S1,S2,S3,K1) + AESD_ROUND_4B(S0,S1,S2,S3,K2) + AESD_ROUND_4B(S0,S1,S2,S3,K3) + AESD_ROUND_4B(S0,S1,S2,S3,K4) + AESD_ROUND_4B(S0,S1,S2,S3,K5) + AESD_ROUND_4B(S0,S1,S2,S3,K6) + AESD_ROUND_4B(S0,S1,S2,S3,K7) + AESD_ROUND_4B(S0,S1,S2,S3,K8) + AESD_ROUND_4B(S0,S1,S2,S3,K9) + AESD_ROUND_4B(S0,S1,S2,S3,K10) + AESD_LAST_ROUND_4B(S0,S1,S2,S3,K11,K12) + + st1 {S0.16b,S1.16b,S2.16b,S3.16b},[DST],#64 + + subs x4,x4,#64 + b.ne L4B_loop + + and LENGTH,LENGTH,#63 + +L1B: + cbz LENGTH,Ldone + +L1B_loop: + ld1 {S0.16b},[SRC],#16 + + AESD_ROUND_1B(S0,K0) + AESD_ROUND_1B(S0,K1) + AESD_ROUND_1B(S0,K2) + AESD_ROUND_1B(S0,K3) + AESD_ROUND_1B(S0,K4) + AESD_ROUND_1B(S0,K5) + AESD_ROUND_1B(S0,K6) + AESD_ROUND_1B(S0,K7) + AESD_ROUND_1B(S0,K8) + AESD_ROUND_1B(S0,K9) + AESD_ROUND_1B(S0,K10) + AESD_LAST_ROUND_1B(S0,K11,K12) + + st1 {S0.16b},[DST],#16 + + subs LENGTH,LENGTH,#16 + b.ne L1B_loop + +Ldone: + ret +EPILOGUE(nettle_aes192_decrypt) diff --git a/arm64/crypto/aes192-encrypt.asm b/arm64/crypto/aes192-encrypt.asm new file mode 100644 index 00000000..5e57d4a4 --- /dev/null +++ b/arm64/crypto/aes192-encrypt.asm @@ -0,0 +1,125 @@ +C arm64/crypto/aes192-encrypt.asm + +ifelse(` + Copyright (C) 2021 Mamone Tarsha + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +') + +.file "aes192-encrypt.asm" +.arch armv8-a+crypto + +.text + +C Register usage: + +define(`KEYS', `x0') +define(`LENGTH', `x1') +define(`DST', `x2') +define(`SRC', `x3') + +define(`S0', `v0') +define(`S1', `v1') +define(`S2', `v2') +define(`S3', `v3') +define(`K0', `v16') +define(`K1', `v17') +define(`K2', `v18') +define(`K3', `v19') +define(`K4', `v20') +define(`K5', `v21') +define(`K6', `v22') +define(`K7', `v23') +define(`K8', `v24') +define(`K9', `v25') +define(`K10', `v26') +define(`K11', `v27') +define(`K12', `v28') + +C void +C aes192_encrypt(const struct aes192_ctx *ctx, +C size_t length, uint8_t *dst, +C const uint8_t *src) + +PROLOGUE(nettle_aes192_encrypt) + ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64 + ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64 + ld1 {K8.4s,K9.4s,K10.4s,K11.4s},[KEYS],#64 + ld1 {K12.4s},[KEYS] + + ands x4,LENGTH,#-64 + b.eq L1B + +L4B_loop: + ld1 {S0.16b,S1.16b,S2.16b,S3.16b},[SRC],#64 + + AESE_ROUND_4B(S0,S1,S2,S3,K0) + AESE_ROUND_4B(S0,S1,S2,S3,K1) + AESE_ROUND_4B(S0,S1,S2,S3,K2) + AESE_ROUND_4B(S0,S1,S2,S3,K3) + AESE_ROUND_4B(S0,S1,S2,S3,K4) + AESE_ROUND_4B(S0,S1,S2,S3,K5) + AESE_ROUND_4B(S0,S1,S2,S3,K6) + AESE_ROUND_4B(S0,S1,S2,S3,K7) + AESE_ROUND_4B(S0,S1,S2,S3,K8) + AESE_ROUND_4B(S0,S1,S2,S3,K9) + AESE_ROUND_4B(S0,S1,S2,S3,K10) + AESE_LAST_ROUND_4B(S0,S1,S2,S3,K11,K12) + + st1 {S0.16b,S1.16b,S2.16b,S3.16b},[DST],#64 + + subs x4,x4,#64 + b.ne L4B_loop + + and LENGTH,LENGTH,#63 + +L1B: + cbz LENGTH,Ldone + +L1B_loop: + ld1 {S0.16b},[SRC],#16 + + AESE_ROUND_1B(S0,K0) + AESE_ROUND_1B(S0,K1) + AESE_ROUND_1B(S0,K2) + AESE_ROUND_1B(S0,K3) + AESE_ROUND_1B(S0,K4) + AESE_ROUND_1B(S0,K5) + AESE_ROUND_1B(S0,K6) + AESE_ROUND_1B(S0,K7) + AESE_ROUND_1B(S0,K8) + AESE_ROUND_1B(S0,K9) + AESE_ROUND_1B(S0,K10) + AESE_LAST_ROUND_1B(S0,K11,K12) + + st1 {S0.16b},[DST],#16 + + subs LENGTH,LENGTH,#16 + b.ne L1B_loop + +Ldone: + ret +EPILOGUE(nettle_aes192_encrypt) diff --git a/arm64/crypto/aes256-decrypt.asm b/arm64/crypto/aes256-decrypt.asm new file mode 100644 index 00000000..d787a74a --- /dev/null +++ b/arm64/crypto/aes256-decrypt.asm @@ -0,0 +1,131 @@ +C arm64/crypto/aes256-decrypt.asm + +ifelse(` + Copyright (C) 2021 Mamone Tarsha + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +') + +.file "aes256-decrypt.asm" +.arch armv8-a+crypto + +.text + +C Register usage: + +define(`KEYS', `x0') +define(`LENGTH', `x1') +define(`DST', `x2') +define(`SRC', `x3') + +define(`S0', `v0') +define(`S1', `v1') +define(`S2', `v2') +define(`S3', `v3') +define(`K0', `v16') +define(`K1', `v17') +define(`K2', `v18') +define(`K3', `v19') +define(`K4', `v20') +define(`K5', `v21') +define(`K6', `v22') +define(`K7', `v23') +define(`K8', `v24') +define(`K9', `v25') +define(`K10', `v26') +define(`K11', `v27') +define(`K12', `v28') +define(`K13', `v29') +define(`K14', `v30') + +C void +C aes256_decrypt(const struct aes256_ctx *ctx, +C size_t length, uint8_t *dst, +C const uint8_t *src) + +PROLOGUE(nettle_aes256_decrypt) + ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64 + ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64 + ld1 {K8.4s,K9.4s,K10.4s,K11.4s},[KEYS],#64 + ld1 {K12.4s,K13.4s,K14.4s},[KEYS] + + ands x4,LENGTH,#-64 + b.eq L1B + +L4B_loop: + ld1 {S0.16b,S1.16b,S2.16b,S3.16b},[SRC],#64 + + AESD_ROUND_4B(S0,S1,S2,S3,K0) + AESD_ROUND_4B(S0,S1,S2,S3,K1) + AESD_ROUND_4B(S0,S1,S2,S3,K2) + AESD_ROUND_4B(S0,S1,S2,S3,K3) + AESD_ROUND_4B(S0,S1,S2,S3,K4) + AESD_ROUND_4B(S0,S1,S2,S3,K5) + AESD_ROUND_4B(S0,S1,S2,S3,K6) + AESD_ROUND_4B(S0,S1,S2,S3,K7) + AESD_ROUND_4B(S0,S1,S2,S3,K8) + AESD_ROUND_4B(S0,S1,S2,S3,K9) + AESD_ROUND_4B(S0,S1,S2,S3,K10) + AESD_ROUND_4B(S0,S1,S2,S3,K11) + AESD_ROUND_4B(S0,S1,S2,S3,K12) + AESD_LAST_ROUND_4B(S0,S1,S2,S3,K13,K14) + + st1 {S0.16b,S1.16b,S2.16b,S3.16b},[DST],#64 + + subs x4,x4,#64 + b.ne L4B_loop + + and LENGTH,LENGTH,#63 + +L1B: + cbz LENGTH,Ldone + +L1B_loop: + ld1 {S0.16b},[SRC],#16 + + AESD_ROUND_1B(S0,K0) + AESD_ROUND_1B(S0,K1) + AESD_ROUND_1B(S0,K2) + AESD_ROUND_1B(S0,K3) + AESD_ROUND_1B(S0,K4) + AESD_ROUND_1B(S0,K5) + AESD_ROUND_1B(S0,K6) + AESD_ROUND_1B(S0,K7) + AESD_ROUND_1B(S0,K8) + AESD_ROUND_1B(S0,K9) + AESD_ROUND_1B(S0,K10) + AESD_ROUND_1B(S0,K11) + AESD_ROUND_1B(S0,K12) + AESD_LAST_ROUND_1B(S0,K13,K14) + + st1 {S0.16b},[DST],#16 + + subs LENGTH,LENGTH,#16 + b.ne L1B_loop + +Ldone: + ret +EPILOGUE(nettle_aes256_decrypt) diff --git a/arm64/crypto/aes256-encrypt.asm b/arm64/crypto/aes256-encrypt.asm new file mode 100644 index 00000000..a6321b82 --- /dev/null +++ b/arm64/crypto/aes256-encrypt.asm @@ -0,0 +1,131 @@ +C arm64/crypto/aes256-encrypt.asm + +ifelse(` + Copyright (C) 2021 Mamone Tarsha + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +') + +.file "aes256-encrypt.asm" +.arch armv8-a+crypto + +.text + +C Register usage: + +define(`KEYS', `x0') +define(`LENGTH', `x1') +define(`DST', `x2') +define(`SRC', `x3') + +define(`S0', `v0') +define(`S1', `v1') +define(`S2', `v2') +define(`S3', `v3') +define(`K0', `v16') +define(`K1', `v17') +define(`K2', `v18') +define(`K3', `v19') +define(`K4', `v20') +define(`K5', `v21') +define(`K6', `v22') +define(`K7', `v23') +define(`K8', `v24') +define(`K9', `v25') +define(`K10', `v26') +define(`K11', `v27') +define(`K12', `v28') +define(`K13', `v29') +define(`K14', `v30') + +C void +C aes256_encrypt(const struct aes256_ctx *ctx, +C size_t length, uint8_t *dst, +C const uint8_t *src) + +PROLOGUE(nettle_aes256_encrypt) + ld1 {K0.4s,K1.4s,K2.4s,K3.4s},[KEYS],#64 + ld1 {K4.4s,K5.4s,K6.4s,K7.4s},[KEYS],#64 + ld1 {K8.4s,K9.4s,K10.4s,K11.4s},[KEYS],#64 + ld1 {K12.4s,K13.4s,K14.4s},[KEYS] + + ands x4,LENGTH,#-64 + b.eq L1B + +L4B_loop: + ld1 {S0.16b,S1.16b,S2.16b,S3.16b},[SRC],#64 + + AESE_ROUND_4B(S0,S1,S2,S3,K0) + AESE_ROUND_4B(S0,S1,S2,S3,K1) + AESE_ROUND_4B(S0,S1,S2,S3,K2) + AESE_ROUND_4B(S0,S1,S2,S3,K3) + AESE_ROUND_4B(S0,S1,S2,S3,K4) + AESE_ROUND_4B(S0,S1,S2,S3,K5) + AESE_ROUND_4B(S0,S1,S2,S3,K6) + AESE_ROUND_4B(S0,S1,S2,S3,K7) + AESE_ROUND_4B(S0,S1,S2,S3,K8) + AESE_ROUND_4B(S0,S1,S2,S3,K9) + AESE_ROUND_4B(S0,S1,S2,S3,K10) + AESE_ROUND_4B(S0,S1,S2,S3,K11) + AESE_ROUND_4B(S0,S1,S2,S3,K12) + AESE_LAST_ROUND_4B(S0,S1,S2,S3,K13,K14) + + st1 {S0.16b,S1.16b,S2.16b,S3.16b},[DST],#64 + + subs x4,x4,#64 + b.ne L4B_loop + + and LENGTH,LENGTH,#63 + +L1B: + cbz LENGTH,Ldone + +L1B_loop: + ld1 {S0.16b},[SRC],#16 + + AESE_ROUND_1B(S0,K0) + AESE_ROUND_1B(S0,K1) + AESE_ROUND_1B(S0,K2) + AESE_ROUND_1B(S0,K3) + AESE_ROUND_1B(S0,K4) + AESE_ROUND_1B(S0,K5) + AESE_ROUND_1B(S0,K6) + AESE_ROUND_1B(S0,K7) + AESE_ROUND_1B(S0,K8) + AESE_ROUND_1B(S0,K9) + AESE_ROUND_1B(S0,K10) + AESE_ROUND_1B(S0,K11) + AESE_ROUND_1B(S0,K12) + AESE_LAST_ROUND_1B(S0,K13,K14) + + st1 {S0.16b},[DST],#16 + + subs LENGTH,LENGTH,#16 + b.ne L1B_loop + +Ldone: + ret +EPILOGUE(nettle_aes256_encrypt) diff --git a/arm64/fat/aes128-decrypt-2.asm b/arm64/fat/aes128-decrypt-2.asm new file mode 100644 index 00000000..ff336153 --- /dev/null +++ b/arm64/fat/aes128-decrypt-2.asm @@ -0,0 +1,36 @@ +C arm64/fat/aes128-decrypt.asm + +ifelse(` + Copyright (C) 2021 Mamone Tarsha + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +') + +dnl PROLOGUE(nettle_aes128_decrypt) picked up by configure + +define(`fat_transform', `_$1_arm64') +include_src(`arm64/crypto/aes128-decrypt.asm') diff --git a/arm64/fat/aes128-encrypt-2.asm b/arm64/fat/aes128-encrypt-2.asm new file mode 100644 index 00000000..68cce46d --- /dev/null +++ b/arm64/fat/aes128-encrypt-2.asm @@ -0,0 +1,36 @@ +C arm64/fat/aes128-encrypt.asm + +ifelse(` + Copyright (C) 2021 Mamone Tarsha + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +') + +dnl PROLOGUE(nettle_aes128_encrypt) picked up by configure + +define(`fat_transform', `_$1_arm64') +include_src(`arm64/crypto/aes128-encrypt.asm') diff --git a/arm64/fat/aes192-decrypt-2.asm b/arm64/fat/aes192-decrypt-2.asm new file mode 100644 index 00000000..0937a06a --- /dev/null +++ b/arm64/fat/aes192-decrypt-2.asm @@ -0,0 +1,36 @@ +C arm64/fat/aes192-decrypt.asm + +ifelse(` + Copyright (C) 2021 Mamone Tarsha + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +') + +dnl PROLOGUE(nettle_aes192_decrypt) picked up by configure + +define(`fat_transform', `_$1_arm64') +include_src(`arm64/crypto/aes192-decrypt.asm') diff --git a/arm64/fat/aes192-encrypt-2.asm b/arm64/fat/aes192-encrypt-2.asm new file mode 100644 index 00000000..cf93bc21 --- /dev/null +++ b/arm64/fat/aes192-encrypt-2.asm @@ -0,0 +1,36 @@ +C arm64/fat/aes192-encrypt.asm + +ifelse(` + Copyright (C) 2021 Mamone Tarsha + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +') + +dnl PROLOGUE(nettle_aes192_encrypt) picked up by configure + +define(`fat_transform', `_$1_arm64') +include_src(`arm64/crypto/aes192-encrypt.asm') diff --git a/arm64/fat/aes256-decrypt-2.asm b/arm64/fat/aes256-decrypt-2.asm new file mode 100644 index 00000000..43fc49aa --- /dev/null +++ b/arm64/fat/aes256-decrypt-2.asm @@ -0,0 +1,36 @@ +C arm64/fat/aes256-decrypt.asm + +ifelse(` + Copyright (C) 2021 Mamone Tarsha + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +') + +dnl PROLOGUE(nettle_aes256_decrypt) picked up by configure + +define(`fat_transform', `_$1_arm64') +include_src(`arm64/crypto/aes256-decrypt.asm') diff --git a/arm64/fat/aes256-encrypt-2.asm b/arm64/fat/aes256-encrypt-2.asm new file mode 100644 index 00000000..26f4ac6d --- /dev/null +++ b/arm64/fat/aes256-encrypt-2.asm @@ -0,0 +1,36 @@ +C arm64/fat/aes256-encrypt.asm + +ifelse(` + Copyright (C) 2021 Mamone Tarsha + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +') + +dnl PROLOGUE(nettle_aes256_encrypt) picked up by configure + +define(`fat_transform', `_$1_arm64') +include_src(`arm64/crypto/aes256-encrypt.asm') diff --git a/arm64/machine.m4 b/arm64/machine.m4 index 7df62bcc..d47825ae 100644 --- a/arm64/machine.m4 +++ b/arm64/machine.m4 @@ -5,3 +5,67 @@ define(`SFP',``s'substr($1,1,len($1))') C Get 128-bit floating-point register from vector register C QFP(VR) define(`QFP',``q'substr($1,1,len($1))') + +C AES encryption round of 1-block +C AESE_ROUND_1B(BLOCK, KEY) +define(`AESE_ROUND_1B', m4_assert_numargs(2)` + aese $1.16b,$2.16b + aesmc $1.16b,$1.16b +') + +C AES last encryption round of 1-block +C AESE_LAST_ROUND_1B(BLOCK, KEY0, KEY1) +define(`AESE_LAST_ROUND_1B', m4_assert_numargs(3)` + aese $1.16b,$2.16b + eor $1.16b,$1.16b,$3.16b +') + +C AES decryption round of 1-block +C AESD_ROUND_1B(BLOCK, KEY) +define(`AESD_ROUND_1B', m4_assert_numargs(2)` + aesd $1.16b,$2.16b + aesimc $1.16b,$1.16b +') + +C AES last decryption round of 1-block +C AESD_LAST_ROUND_1B(BLOCK, KEY0, KEY1) +define(`AESD_LAST_ROUND_1B', m4_assert_numargs(3)` + aesd $1.16b,$2.16b + eor $1.16b,$1.16b,$3.16b +') + +C AES encryption round of 4-blocks +C AESE_ROUND_4B(BLOCK0, BLOCK1, BLOCK2, BLOCK3, KEY) +define(`AESE_ROUND_4B', m4_assert_numargs(5)` + AESE_ROUND_1B($1,$5) + AESE_ROUND_1B($2,$5) + AESE_ROUND_1B($3,$5) + AESE_ROUND_1B($4,$5) +') + +C AES last encryption round of 4-blocks +C AESE_LAST_ROUND_4B(BLOCK0, BLOCK1, BLOCK2, BLOCK3, KEY0, KEY1) +define(`AESE_LAST_ROUND_4B', m4_assert_numargs(6)` + AESE_LAST_ROUND_1B($1,$5,$6) + AESE_LAST_ROUND_1B($2,$5,$6) + AESE_LAST_ROUND_1B($3,$5,$6) + AESE_LAST_ROUND_1B($4,$5,$6) +') + +C AES decryption round of 4-blocks +C AESD_ROUND_4B(BLOCK0, BLOCK1, BLOCK2, BLOCK3, KEY) +define(`AESD_ROUND_4B', m4_assert_numargs(5)` + AESD_ROUND_1B($1,$5) + AESD_ROUND_1B($2,$5) + AESD_ROUND_1B($3,$5) + AESD_ROUND_1B($4,$5) +') + +C AES last decryption round of 4-blocks +C AESD_LAST_ROUND_4B(BLOCK0, BLOCK1, BLOCK2, BLOCK3, KEY0, KEY1) +define(`AESD_LAST_ROUND_4B', m4_assert_numargs(6)` + AESD_LAST_ROUND_1B($1,$5,$6) + AESD_LAST_ROUND_1B($2,$5,$6) + AESD_LAST_ROUND_1B($3,$5,$6) + AESD_LAST_ROUND_1B($4,$5,$6) +') diff --git a/configure.ac b/configure.ac index 546cd1b3..a307d936 100644 --- a/configure.ac +++ b/configure.ac @@ -499,7 +499,7 @@ if test "x$enable_assembler" = xyes ; then if test "x$enable_fat" = xyes ; then asm_path="arm64/fat $asm_path" OPT_NETTLE_SOURCES="fat-arm64.c $OPT_NETTLE_SOURCES" - FAT_TEST_LIST="none pmull sha1 sha2" + FAT_TEST_LIST="none aes pmull sha1 sha2" else if test "$enable_arm64_crypto" = yes ; then asm_path="arm64/crypto $asm_path" diff --git a/fat-arm64.c b/fat-arm64.c index 9bcb208a..fcb2ece8 100644 --- a/fat-arm64.c +++ b/fat-arm64.c @@ -50,6 +50,7 @@ #include "nettle-types.h" +#include "aes.h" #include "gcm.h" #include "gcm-internal.h" #include "fat-setup.h" @@ -58,6 +59,9 @@ #ifndef HWCAP_ASIMD #define HWCAP_ASIMD (1 << 1) #endif +#ifndef HWCAP_AES +#define HWCAP_AES (1 << 3) +#endif #ifndef HWCAP_PMULL #define HWCAP_PMULL (1 << 4) #endif @@ -70,6 +74,7 @@ struct arm64_features { + int have_aes; int have_pmull; int have_sha1; int have_sha2; @@ -82,6 +87,7 @@ static void get_arm64_features (struct arm64_features *features) { const char *s; + features->have_aes = 0; features->have_pmull = 0; features->have_sha1 = 0; features->have_sha2 = 0; @@ -93,7 +99,9 @@ get_arm64_features (struct arm64_features *features) const char *sep = strchr (s, ','); size_t length = sep ? (size_t) (sep - s) : strlen(s); - if (MATCH (s, length, "pmull", 5)) + if (MATCH (s, length, "aes", 3)) + features->have_aes = 1; + else if (MATCH (s, length, "pmull", 5)) features->have_pmull = 1; else if (MATCH (s, length, "sha1", 4)) features->have_sha1 = 1; @@ -107,6 +115,8 @@ get_arm64_features (struct arm64_features *features) { #if USE_GETAUXVAL unsigned long hwcap = getauxval(AT_HWCAP); + features->have_aes + = ((hwcap & (HWCAP_ASIMD | HWCAP_AES)) == (HWCAP_ASIMD | HWCAP_AES)); features->have_pmull = ((hwcap & (HWCAP_ASIMD | HWCAP_PMULL)) == (HWCAP_ASIMD | HWCAP_PMULL)); features->have_sha1 @@ -117,6 +127,27 @@ get_arm64_features (struct arm64_features *features) } } +DECLARE_FAT_FUNC(nettle_aes128_encrypt, aes128_crypt_func) +DECLARE_FAT_FUNC_VAR(aes128_encrypt, aes128_crypt_func, c) +DECLARE_FAT_FUNC_VAR(aes128_encrypt, aes128_crypt_func, arm64) +DECLARE_FAT_FUNC(nettle_aes128_decrypt, aes128_crypt_func) +DECLARE_FAT_FUNC_VAR(aes128_decrypt, aes128_crypt_func, c) +DECLARE_FAT_FUNC_VAR(aes128_decrypt, aes128_crypt_func, arm64) + +DECLARE_FAT_FUNC(nettle_aes192_encrypt, aes192_crypt_func) +DECLARE_FAT_FUNC_VAR(aes192_encrypt, aes192_crypt_func, c) +DECLARE_FAT_FUNC_VAR(aes192_encrypt, aes192_crypt_func, arm64) +DECLARE_FAT_FUNC(nettle_aes192_decrypt, aes192_crypt_func) +DECLARE_FAT_FUNC_VAR(aes192_decrypt, aes192_crypt_func, c) +DECLARE_FAT_FUNC_VAR(aes192_decrypt, aes192_crypt_func, arm64) + +DECLARE_FAT_FUNC(nettle_aes256_encrypt, aes256_crypt_func) +DECLARE_FAT_FUNC_VAR(aes256_encrypt, aes256_crypt_func, c) +DECLARE_FAT_FUNC_VAR(aes256_encrypt, aes256_crypt_func, arm64) +DECLARE_FAT_FUNC(nettle_aes256_decrypt, aes256_crypt_func) +DECLARE_FAT_FUNC_VAR(aes256_decrypt, aes256_crypt_func, c) +DECLARE_FAT_FUNC_VAR(aes256_decrypt, aes256_crypt_func, arm64) + #if GCM_TABLE_BITS == 8 DECLARE_FAT_FUNC(_nettle_gcm_init_key, gcm_init_key_func) DECLARE_FAT_FUNC_VAR(gcm_init_key, gcm_init_key_func, c) @@ -145,11 +176,33 @@ fat_init (void) verbose = getenv (ENV_VERBOSE) != NULL; if (verbose) - fprintf (stderr, "libnettle: cpu features:%s%s%s\n", + fprintf (stderr, "libnettle: cpu features:%s%s%s%s\n", + features.have_aes ? " aes instructions" : "", features.have_pmull ? " polynomial multiply long instructions (PMULL/PMULL2)" : "", features.have_sha1 ? " sha1 instructions" : "", features.have_sha2 ? " sha2 instructions" : ""); + if (features.have_aes) + { + if (verbose) + fprintf (stderr, "libnettle: enabling hardware accelerated AES encrypt/decrypt code.\n"); + nettle_aes128_encrypt_vec = _nettle_aes128_encrypt_arm64; + nettle_aes128_decrypt_vec = _nettle_aes128_decrypt_arm64; + nettle_aes192_encrypt_vec = _nettle_aes192_encrypt_arm64; + nettle_aes192_decrypt_vec = _nettle_aes192_decrypt_arm64; + nettle_aes256_encrypt_vec = _nettle_aes256_encrypt_arm64; + nettle_aes256_decrypt_vec = _nettle_aes256_decrypt_arm64; + } + else + { + nettle_aes128_encrypt_vec = _nettle_aes128_encrypt_c; + nettle_aes128_decrypt_vec = _nettle_aes128_decrypt_c; + nettle_aes192_encrypt_vec = _nettle_aes192_encrypt_c; + nettle_aes192_decrypt_vec = _nettle_aes192_decrypt_c; + nettle_aes256_encrypt_vec = _nettle_aes256_encrypt_c; + nettle_aes256_decrypt_vec = _nettle_aes256_decrypt_c; + } + if (features.have_pmull) { if (verbose) @@ -192,6 +245,33 @@ fat_init (void) } } +DEFINE_FAT_FUNC(nettle_aes128_encrypt, void, + (const struct aes128_ctx *ctx, size_t length, + uint8_t *dst,const uint8_t *src), + (ctx, length, dst, src)) +DEFINE_FAT_FUNC(nettle_aes128_decrypt, void, + (const struct aes128_ctx *ctx, size_t length, + uint8_t *dst,const uint8_t *src), + (ctx, length, dst, src)) + +DEFINE_FAT_FUNC(nettle_aes192_encrypt, void, + (const struct aes192_ctx *ctx, size_t length, + uint8_t *dst,const uint8_t *src), + (ctx, length, dst, src)) +DEFINE_FAT_FUNC(nettle_aes192_decrypt, void, + (const struct aes192_ctx *ctx, size_t length, + uint8_t *dst,const uint8_t *src), + (ctx, length, dst, src)) + +DEFINE_FAT_FUNC(nettle_aes256_encrypt, void, + (const struct aes256_ctx *ctx, size_t length, + uint8_t *dst,const uint8_t *src), + (ctx, length, dst, src)) +DEFINE_FAT_FUNC(nettle_aes256_decrypt, void, + (const struct aes256_ctx *ctx, size_t length, + uint8_t *dst,const uint8_t *src), + (ctx, length, dst, src)) + #if GCM_TABLE_BITS == 8 DEFINE_FAT_FUNC(_nettle_gcm_init_key, void, (union nettle_block16 *table), |