diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2022-01-07 16:04:40 +0200 |
---|---|---|
committer | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2022-01-11 20:10:12 +0200 |
commit | 859b6ac7fbdb6ec18d1536e14b9ee83c1add224e (patch) | |
tree | d96e1099461532141ae6084e63603ef07ea81068 | |
parent | f664333a4749eab03c6b19bae2f28863b8501a24 (diff) | |
download | libgcrypt-859b6ac7fbdb6ec18d1536e14b9ee83c1add224e.tar.gz |
Use 'vmov' and 'movi' for vector register clearing in ARM assembly
* cipher/chacha20-aarch64.S (clear): Use 'movi'.
* cipher/chacha20-armv7-neon.S (clear): Use 'vmov'.
* cipher/cipher-gcm-armv7-neon.S (clear): Use 'vmov'.
* cipher/cipher-gcm-armv8-aarch32-ce.S (CLEAR_REG): Use 'vmov'.
* cipher/cipher-gcm-armv8-aarch64-ce.S (CLEAR_REG): Use 'movi'.
* cipher/rijndael-armv8-aarch32-ce.S (CLEAR_REG): Use 'vmov'.
* cipher/sha1-armv7-neon.S (clear): Use 'vmov'.
* cipher/sha1-armv8-aarch32-ce.S (CLEAR_REG): Use 'vmov'.
* cipher/sha1-armv8-aarch64-ce.S (CLEAR_REG): Use 'movi'.
* cipher/sha256-armv8-aarch32-ce.S (CLEAR_REG): Use 'vmov'.
* cipher/sha256-armv8-aarch64-ce.S (CLEAR_REG): Use 'movi'.
* cipher/sha512-armv7-neon.S (CLEAR_REG): New using 'vmov'.
(_gcry_sha512_transform_armv7_neon): Use CLEAR_REG for clearing
registers.
--
Use 'vmov reg, #0' on 32-bit and 'movi reg.16b, #0' instead of
self-xoring register to break false register dependency.
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
-rw-r--r-- | cipher/chacha20-aarch64.S | 2 | ||||
-rw-r--r-- | cipher/chacha20-armv7-neon.S | 2 | ||||
-rw-r--r-- | cipher/cipher-gcm-armv7-neon.S | 2 | ||||
-rw-r--r-- | cipher/cipher-gcm-armv8-aarch32-ce.S | 2 | ||||
-rw-r--r-- | cipher/cipher-gcm-armv8-aarch64-ce.S | 2 | ||||
-rw-r--r-- | cipher/rijndael-armv8-aarch32-ce.S | 2 | ||||
-rw-r--r-- | cipher/sha1-armv7-neon.S | 2 | ||||
-rw-r--r-- | cipher/sha1-armv8-aarch32-ce.S | 2 | ||||
-rw-r--r-- | cipher/sha1-armv8-aarch64-ce.S | 2 | ||||
-rw-r--r-- | cipher/sha256-armv8-aarch32-ce.S | 2 | ||||
-rw-r--r-- | cipher/sha256-armv8-aarch64-ce.S | 2 | ||||
-rw-r--r-- | cipher/sha512-armv7-neon.S | 26 |
12 files changed, 25 insertions, 23 deletions
diff --git a/cipher/chacha20-aarch64.S b/cipher/chacha20-aarch64.S index b8f9724a..4f76834b 100644 --- a/cipher/chacha20-aarch64.S +++ b/cipher/chacha20-aarch64.S @@ -110,7 +110,7 @@ vpunpcklqdq(x2, t2, x2); #define clear(x) \ - eor x.16b, x.16b, x.16b; + movi x.16b, #0; /********************************************************************** 4-way chacha20 diff --git a/cipher/chacha20-armv7-neon.S b/cipher/chacha20-armv7-neon.S index 33a43df1..a862be4e 100644 --- a/cipher/chacha20-armv7-neon.S +++ b/cipher/chacha20-armv7-neon.S @@ -132,7 +132,7 @@ vswp _q0##h, _q2##l; \ vswp _q1##h, _q3##l; -#define clear(x) veor x,x,x; +#define clear(x) vmov.i8 x, #0; /********************************************************************** 4-way chacha20 diff --git a/cipher/cipher-gcm-armv7-neon.S b/cipher/cipher-gcm-armv7-neon.S index a801a5e5..16502b4a 100644 --- a/cipher/cipher-gcm-armv7-neon.S +++ b/cipher/cipher-gcm-armv7-neon.S @@ -210,7 +210,7 @@ gcry_gcm_reduction_constant: /* Other functional macros */ -#define CLEAR_REG(reg) veor reg, reg; +#define CLEAR_REG(reg) vmov.i8 reg, #0; /* diff --git a/cipher/cipher-gcm-armv8-aarch32-ce.S b/cipher/cipher-gcm-armv8-aarch32-ce.S index 1de66a16..fb51b339 100644 --- a/cipher/cipher-gcm-armv8-aarch32-ce.S +++ b/cipher/cipher-gcm-armv8-aarch32-ce.S @@ -180,7 +180,7 @@ gcry_gcm_reduction_constant: /* Other functional macros */ -#define CLEAR_REG(reg) veor reg, reg; +#define CLEAR_REG(reg) vmov.i8 reg, #0; /* diff --git a/cipher/cipher-gcm-armv8-aarch64-ce.S b/cipher/cipher-gcm-armv8-aarch64-ce.S index 877207d3..13ee83ed 100644 --- a/cipher/cipher-gcm-armv8-aarch64-ce.S +++ b/cipher/cipher-gcm-armv8-aarch64-ce.S @@ -149,7 +149,7 @@ gcry_gcm_reduction_constant: #define _(...) __VA_ARGS__ #define __ _() -#define CLEAR_REG(reg) eor reg.16b, reg.16b, reg.16b; +#define CLEAR_REG(reg) movi reg.16b, #0; #define VPUSH_ABI \ stp d8, d9, [sp, #-16]!; \ diff --git a/cipher/rijndael-armv8-aarch32-ce.S b/cipher/rijndael-armv8-aarch32-ce.S index 6d78af0a..1eafa93e 100644 --- a/cipher/rijndael-armv8-aarch32-ce.S +++ b/cipher/rijndael-armv8-aarch32-ce.S @@ -249,7 +249,7 @@ /* Other functional macros */ -#define CLEAR_REG(reg) veor reg, reg; +#define CLEAR_REG(reg) vmov.i8 reg, #0; /* diff --git a/cipher/sha1-armv7-neon.S b/cipher/sha1-armv7-neon.S index 61cc541c..2de678b8 100644 --- a/cipher/sha1-armv7-neon.S +++ b/cipher/sha1-armv7-neon.S @@ -303,7 +303,7 @@ gcry_sha1_armv7_neon_K_VEC: /* Other functional macros */ -#define CLEAR_REG(reg) veor reg, reg; +#define CLEAR_REG(reg) vmov.i8 reg, #0; /* diff --git a/cipher/sha1-armv8-aarch32-ce.S b/cipher/sha1-armv8-aarch32-ce.S index bf2b233b..059b9a85 100644 --- a/cipher/sha1-armv8-aarch32-ce.S +++ b/cipher/sha1-armv8-aarch32-ce.S @@ -100,7 +100,7 @@ gcry_sha1_aarch32_ce_K_VEC: /* Other functional macros */ -#define CLEAR_REG(reg) veor reg, reg; +#define CLEAR_REG(reg) vmov.i8 reg, #0; /* diff --git a/cipher/sha1-armv8-aarch64-ce.S b/cipher/sha1-armv8-aarch64-ce.S index 223268ca..8ea1486b 100644 --- a/cipher/sha1-armv8-aarch64-ce.S +++ b/cipher/sha1-armv8-aarch64-ce.S @@ -88,7 +88,7 @@ gcry_sha1_aarch64_ce_K_VEC: /* Other functional macros */ -#define CLEAR_REG(reg) eor reg.16b, reg.16b, reg.16b; +#define CLEAR_REG(reg) movi reg.16b, #0; /* diff --git a/cipher/sha256-armv8-aarch32-ce.S b/cipher/sha256-armv8-aarch32-ce.S index 2b17ab1b..95778b40 100644 --- a/cipher/sha256-armv8-aarch32-ce.S +++ b/cipher/sha256-armv8-aarch32-ce.S @@ -111,7 +111,7 @@ gcry_sha256_aarch32_ce_K: /* Other functional macros */ -#define CLEAR_REG(reg) veor reg, reg; +#define CLEAR_REG(reg) vmov.i8 reg, #0; /* diff --git a/cipher/sha256-armv8-aarch64-ce.S b/cipher/sha256-armv8-aarch64-ce.S index f57cae29..5c39e83e 100644 --- a/cipher/sha256-armv8-aarch64-ce.S +++ b/cipher/sha256-armv8-aarch64-ce.S @@ -98,7 +98,7 @@ gcry_sha256_aarch64_ce_K: /* Other functional macros */ -#define CLEAR_REG(reg) eor reg.16b, reg.16b, reg.16b; +#define CLEAR_REG(reg) movi reg.16b, #0; /* diff --git a/cipher/sha512-armv7-neon.S b/cipher/sha512-armv7-neon.S index 6596f2cd..2b186b47 100644 --- a/cipher/sha512-armv7-neon.S +++ b/cipher/sha512-armv7-neon.S @@ -91,6 +91,8 @@ #define RW1213q q14 #define RW1415q q15 +#define CLEAR_REG(reg) vmov.i8 reg, #0; + /*********************************************************************** * ARM assembly implementation of sha512 transform ***********************************************************************/ @@ -426,22 +428,22 @@ _gcry_sha512_transform_armv7_neon: /* Clear used registers */ /* d16-d31 */ - veor.u64 RW01q, RW01q; - veor.u64 RW23q, RW23q; - veor.u64 RW45q, RW45q; - veor.u64 RW67q, RW67q; + CLEAR_REG(RW01q); + CLEAR_REG(RW23q); + CLEAR_REG(RW45q); + CLEAR_REG(RW67q); vst1.64 {RE-RH}, [%r0]; /* Store the last half of context */ - veor.u64 RW89q, RW89q; - veor.u64 RW1011q, RW1011q; - veor.u64 RW1213q, RW1213q; - veor.u64 RW1415q, RW1415q; + CLEAR_REG(RW89q); + CLEAR_REG(RW1011q); + CLEAR_REG(RW1213q); + CLEAR_REG(RW1415q); /* d8-d15 */ vpop {RT0-RT7}; /* d0-d7 (q0-q3) */ - veor.u64 %q0, %q0; - veor.u64 %q1, %q1; - veor.u64 %q2, %q2; - veor.u64 %q3, %q3; + CLEAR_REG(%q0); + CLEAR_REG(%q1); + CLEAR_REG(%q2); + CLEAR_REG(%q3); eor %r0, %r0; pop {%pc}; |