diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2019-04-15 22:09:24 +0300 |
---|---|---|
committer | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2019-04-15 22:09:32 +0300 |
commit | 0903b215ef5a18332b740a24e6e2bfbed9e1d97b (patch) | |
tree | 4b3a4a557563405f43061fe10106ccc3af049ca6 /cipher/twofish-amd64.S | |
parent | 2ffc689d4757f31f1e2c4961b94b0b0c8dc302b7 (diff) | |
download | libgcrypt-0903b215ef5a18332b740a24e6e2bfbed9e1d97b.tar.gz |
twofish-amd64: do not use xchg instruction
* cipher/twofish-amd64.S (g1g2_3): Swap ab and cd registers using
'movq' instructions instead of 'xchgq'.
--
Avoiding xchg instruction improves three block parallel performance
by ~3% on Intel Haswell.
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/twofish-amd64.S')
-rw-r--r-- | cipher/twofish-amd64.S | 12 |
1 files changed, 9 insertions, 3 deletions
diff --git a/cipher/twofish-amd64.S b/cipher/twofish-amd64.S index 7a836463..134d6401 100644 --- a/cipher/twofish-amd64.S +++ b/cipher/twofish-amd64.S @@ -368,15 +368,21 @@ ELF(.size _gcry_twofish_amd64_encrypt_block,.-_gcry_twofish_amd64_encrypt_block; /* G1,2 && G2,2 */ \ do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 0, x ## 0); \ do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 0, y ## 0); \ - xchgq cd ## 0, ab ## 0; \ + movq ab ## 0, RT0; \ + movq cd ## 0, ab ## 0; \ + movq RT0, cd ## 0; \ \ do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 1, x ## 1); \ do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 1, y ## 1); \ - xchgq cd ## 1, ab ## 1; \ + movq ab ## 1, RT0; \ + movq cd ## 1, ab ## 1; \ + movq RT0, cd ## 1; \ \ do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 2, x ## 2); \ do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 2, y ## 2); \ - xchgq cd ## 2, ab ## 2; + movq ab ## 2, RT0; \ + movq cd ## 2, ab ## 2; \ + movq RT0, cd ## 2; #define enc_round_end(ab, x, y, n) \ addl y ## d, x ## d; \ |