summaryrefslogtreecommitdiff
path: root/cipher
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2023-01-17 20:23:01 +0200
committerJussi Kivilinna <jussi.kivilinna@iki.fi>2023-01-19 18:32:28 +0200
commit9d62c54de2b0cd3b1849a27f8998e1f0d43f1583 (patch)
tree24a57f2916f454ff9f4b78d8e57a239e021ded12 /cipher
parent14137d685cf8d779ac4656b64b7d5adcdcf90e3a (diff)
downloadlibgcrypt-9d62c54de2b0cd3b1849a27f8998e1f0d43f1583.tar.gz
amd64-asm: align functions to 16 bytes for cipher algos
* cipher/blowfish-amd64.S: Align functions to 16 bytes. * cipher/camellia-aesni-avx-amd64.S: Likewise. * cipher/camellia-aesni-avx2-amd64.h: Likewise. * cipher/camellia-gfni-avx512-amd64.S: Likewise. * cipher/cast5-amd64.S: Likewise. * cipher/chacha20-amd64-avx2.S: Likewise. * cipher/chacha20-amd64-ssse3.S: Likewise. * cipher/des-amd64.s: Likewise. * cipher/rijndael-amd64.S: Likewise. * cipher/rijndael-ssse3-amd64-asm.S: Likewise. * cipher/salsa20-amd64.S: Likewise. * cipher/serpent-avx2-amd64.S: Likewise. * cipher/serpent-sse2-amd64.S: Likewise. * cipher/sm4-aesni-avx-amd64.S: Likewise. * cipher/sm4-aesni-avx2-amd64.S: Likewise. * cipher/sm4-gfni-avx2-amd64.S: Likewise. * cipher/twofish-amd64.S: Likewise. * cipher/twofish-avx2-amd64.S: Likewise. -- Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher')
-rw-r--r--cipher/blowfish-amd64.S18
-rw-r--r--cipher/camellia-aesni-avx-amd64.S22
-rw-r--r--cipher/camellia-aesni-avx2-amd64.h20
-rw-r--r--cipher/camellia-gfni-avx512-amd64.S18
-rw-r--r--cipher/cast5-amd64.S14
-rw-r--r--cipher/chacha20-amd64-avx2.S4
-rw-r--r--cipher/chacha20-amd64-ssse3.S8
-rw-r--r--cipher/des-amd64.S10
-rw-r--r--cipher/rijndael-amd64.S4
-rw-r--r--cipher/rijndael-ssse3-amd64-asm.S2
-rw-r--r--cipher/salsa20-amd64.S6
-rw-r--r--cipher/serpent-avx2-amd64.S18
-rw-r--r--cipher/serpent-sse2-amd64.S18
-rw-r--r--cipher/sm4-aesni-avx-amd64.S20
-rw-r--r--cipher/sm4-aesni-avx2-amd64.S16
-rw-r--r--cipher/sm4-gfni-avx2-amd64.S24
-rw-r--r--cipher/twofish-amd64.S22
-rw-r--r--cipher/twofish-avx2-amd64.S18
18 files changed, 132 insertions, 130 deletions
diff --git a/cipher/blowfish-amd64.S b/cipher/blowfish-amd64.S
index 2b4ffa1a..95d57a99 100644
--- a/cipher/blowfish-amd64.S
+++ b/cipher/blowfish-amd64.S
@@ -123,7 +123,7 @@
bswapq RX0; \
movq RX0, (RIO);
-.align 8
+.align 16
ELF(.type __blowfish_enc_blk1,@function;)
__blowfish_enc_blk1:
@@ -155,7 +155,7 @@ __blowfish_enc_blk1:
CFI_ENDPROC();
ELF(.size __blowfish_enc_blk1,.-__blowfish_enc_blk1;)
-.align 8
+.align 16
.globl _gcry_blowfish_amd64_do_encrypt
ELF(.type _gcry_blowfish_amd64_do_encrypt,@function;)
@@ -186,7 +186,7 @@ _gcry_blowfish_amd64_do_encrypt:
CFI_ENDPROC();
ELF(.size _gcry_blowfish_amd64_do_encrypt,.-_gcry_blowfish_amd64_do_encrypt;)
-.align 8
+.align 16
.globl _gcry_blowfish_amd64_encrypt_block
ELF(.type _gcry_blowfish_amd64_encrypt_block,@function;)
@@ -214,7 +214,7 @@ _gcry_blowfish_amd64_encrypt_block:
CFI_ENDPROC();
ELF(.size _gcry_blowfish_amd64_encrypt_block,.-_gcry_blowfish_amd64_encrypt_block;)
-.align 8
+.align 16
.globl _gcry_blowfish_amd64_decrypt_block
ELF(.type _gcry_blowfish_amd64_decrypt_block,@function;)
@@ -342,7 +342,7 @@ ELF(.size _gcry_blowfish_amd64_decrypt_block,.-_gcry_blowfish_amd64_decrypt_bloc
bswapq RX2; \
bswapq RX3;
-.align 8
+.align 16
ELF(.type __blowfish_enc_blk4,@function;)
__blowfish_enc_blk4:
@@ -371,7 +371,7 @@ __blowfish_enc_blk4:
CFI_ENDPROC();
ELF(.size __blowfish_enc_blk4,.-__blowfish_enc_blk4;)
-.align 8
+.align 16
ELF(.type __blowfish_dec_blk4,@function;)
__blowfish_dec_blk4:
@@ -402,7 +402,7 @@ __blowfish_dec_blk4:
CFI_ENDPROC();
ELF(.size __blowfish_dec_blk4,.-__blowfish_dec_blk4;)
-.align 8
+.align 16
.globl _gcry_blowfish_amd64_ctr_enc
ELF(.type _gcry_blowfish_amd64_ctr_enc,@function;)
_gcry_blowfish_amd64_ctr_enc:
@@ -472,7 +472,7 @@ _gcry_blowfish_amd64_ctr_enc:
CFI_ENDPROC();
ELF(.size _gcry_blowfish_amd64_ctr_enc,.-_gcry_blowfish_amd64_ctr_enc;)
-.align 8
+.align 16
.globl _gcry_blowfish_amd64_cbc_dec
ELF(.type _gcry_blowfish_amd64_cbc_dec,@function;)
_gcry_blowfish_amd64_cbc_dec:
@@ -533,7 +533,7 @@ _gcry_blowfish_amd64_cbc_dec:
CFI_ENDPROC();
ELF(.size _gcry_blowfish_amd64_cbc_dec,.-_gcry_blowfish_amd64_cbc_dec;)
-.align 8
+.align 16
.globl _gcry_blowfish_amd64_cfb_dec
ELF(.type _gcry_blowfish_amd64_cfb_dec,@function;)
_gcry_blowfish_amd64_cfb_dec:
diff --git a/cipher/camellia-aesni-avx-amd64.S b/cipher/camellia-aesni-avx-amd64.S
index 5c304e57..e15e445b 100644
--- a/cipher/camellia-aesni-avx-amd64.S
+++ b/cipher/camellia-aesni-avx-amd64.S
@@ -764,7 +764,7 @@
.long 0x0f0f0f0f
-.align 8
+.align 16
ELF(.type __camellia_enc_blk16,@function;)
__camellia_enc_blk16:
@@ -826,7 +826,7 @@ __camellia_enc_blk16:
CFI_ENDPROC();
ELF(.size __camellia_enc_blk16,.-__camellia_enc_blk16;)
-.align 8
+.align 16
ELF(.type __camellia_dec_blk16,@function;)
__camellia_dec_blk16:
@@ -897,7 +897,7 @@ ELF(.size __camellia_dec_blk16,.-__camellia_dec_blk16;)
vpslldq $8, tmp, tmp; \
vpsubq tmp, x, x;
-.align 8
+.align 16
.globl _gcry_camellia_aesni_avx_ctr_enc
ELF(.type _gcry_camellia_aesni_avx_ctr_enc,@function;)
@@ -1025,7 +1025,7 @@ _gcry_camellia_aesni_avx_ctr_enc:
CFI_ENDPROC();
ELF(.size _gcry_camellia_aesni_avx_ctr_enc,.-_gcry_camellia_aesni_avx_ctr_enc;)
-.align 8
+.align 16
.globl _gcry_camellia_aesni_avx_cbc_dec
ELF(.type _gcry_camellia_aesni_avx_cbc_dec,@function;)
@@ -1098,7 +1098,7 @@ _gcry_camellia_aesni_avx_cbc_dec:
CFI_ENDPROC();
ELF(.size _gcry_camellia_aesni_avx_cbc_dec,.-_gcry_camellia_aesni_avx_cbc_dec;)
-.align 8
+.align 16
.globl _gcry_camellia_aesni_avx_cfb_dec
ELF(.type _gcry_camellia_aesni_avx_cfb_dec,@function;)
@@ -1180,7 +1180,7 @@ _gcry_camellia_aesni_avx_cfb_dec:
CFI_ENDPROC();
ELF(.size _gcry_camellia_aesni_avx_cfb_dec,.-_gcry_camellia_aesni_avx_cfb_dec;)
-.align 8
+.align 16
.globl _gcry_camellia_aesni_avx_ocb_enc
ELF(.type _gcry_camellia_aesni_avx_ocb_enc,@function;)
@@ -1332,7 +1332,7 @@ _gcry_camellia_aesni_avx_ocb_enc:
CFI_ENDPROC();
ELF(.size _gcry_camellia_aesni_avx_ocb_enc,.-_gcry_camellia_aesni_avx_ocb_enc;)
-.align 8
+.align 16
.globl _gcry_camellia_aesni_avx_ocb_dec
ELF(.type _gcry_camellia_aesni_avx_ocb_dec,@function;)
@@ -1503,7 +1503,7 @@ _gcry_camellia_aesni_avx_ocb_dec:
CFI_ENDPROC();
ELF(.size _gcry_camellia_aesni_avx_ocb_dec,.-_gcry_camellia_aesni_avx_ocb_dec;)
-.align 8
+.align 16
.globl _gcry_camellia_aesni_avx_ocb_auth
ELF(.type _gcry_camellia_aesni_avx_ocb_auth,@function;)
@@ -1753,7 +1753,7 @@ ELF(.size _gcry_camellia_aesni_avx_ocb_auth,.-_gcry_camellia_aesni_avx_ocb_auth;
.long 0xB3E6C1FD, 0xB05688C2;
-.align 8
+.align 16
ELF(.type __camellia_avx_setup128,@function;)
__camellia_avx_setup128:
/* input:
@@ -2100,7 +2100,7 @@ __camellia_avx_setup128:
CFI_ENDPROC();
ELF(.size __camellia_avx_setup128,.-__camellia_avx_setup128;)
-.align 8
+.align 16
ELF(.type __camellia_avx_setup256,@function;)
__camellia_avx_setup256:
@@ -2580,7 +2580,7 @@ __camellia_avx_setup256:
CFI_ENDPROC();
ELF(.size __camellia_avx_setup256,.-__camellia_avx_setup256;)
-.align 8
+.align 16
.globl _gcry_camellia_aesni_avx_keygen
ELF(.type _gcry_camellia_aesni_avx_keygen,@function;)
diff --git a/cipher/camellia-aesni-avx2-amd64.h b/cipher/camellia-aesni-avx2-amd64.h
index 411e790f..b97cc2e3 100644
--- a/cipher/camellia-aesni-avx2-amd64.h
+++ b/cipher/camellia-aesni-avx2-amd64.h
@@ -997,7 +997,7 @@ ELF(.type FUNC_NAME(_constants),@object;)
ELF(.size FUNC_NAME(_constants),.-FUNC_NAME(_constants);)
-.align 8
+.align 16
ELF(.type FUNC_NAME(enc_blk32),@function;)
FUNC_NAME(enc_blk32):
@@ -1059,7 +1059,7 @@ FUNC_NAME(enc_blk32):
CFI_ENDPROC();
ELF(.size FUNC_NAME(enc_blk32),.-FUNC_NAME(enc_blk32);)
-.align 8
+.align 16
ELF(.type FUNC_NAME(dec_blk32),@function;)
FUNC_NAME(dec_blk32):
@@ -1130,7 +1130,7 @@ ELF(.size FUNC_NAME(dec_blk32),.-FUNC_NAME(dec_blk32);)
vpslldq $8, tmp, tmp; \
vpsubq tmp, x, x;
-.align 8
+.align 16
.globl FUNC_NAME(ctr_enc)
ELF(.type FUNC_NAME(ctr_enc),@function;)
@@ -1325,7 +1325,7 @@ FUNC_NAME(ctr_enc):
CFI_ENDPROC();
ELF(.size FUNC_NAME(ctr_enc),.-FUNC_NAME(ctr_enc);)
-.align 8
+.align 16
.globl FUNC_NAME(cbc_dec)
ELF(.type FUNC_NAME(cbc_dec),@function;)
@@ -1400,7 +1400,7 @@ FUNC_NAME(cbc_dec):
CFI_ENDPROC();
ELF(.size FUNC_NAME(cbc_dec),.-FUNC_NAME(cbc_dec);)
-.align 8
+.align 16
.globl FUNC_NAME(cfb_dec)
ELF(.type FUNC_NAME(cfb_dec),@function;)
@@ -1482,7 +1482,7 @@ FUNC_NAME(cfb_dec):
CFI_ENDPROC();
ELF(.size FUNC_NAME(cfb_dec),.-FUNC_NAME(cfb_dec);)
-.align 8
+.align 16
.globl FUNC_NAME(ocb_enc)
ELF(.type FUNC_NAME(ocb_enc),@function;)
@@ -1654,7 +1654,7 @@ FUNC_NAME(ocb_enc):
CFI_ENDPROC();
ELF(.size FUNC_NAME(ocb_enc),.-FUNC_NAME(ocb_enc);)
-.align 8
+.align 16
.globl FUNC_NAME(ocb_dec)
ELF(.type FUNC_NAME(ocb_dec),@function;)
@@ -1849,7 +1849,7 @@ FUNC_NAME(ocb_dec):
CFI_ENDPROC();
ELF(.size FUNC_NAME(ocb_dec),.-FUNC_NAME(ocb_dec);)
-.align 8
+.align 16
.globl FUNC_NAME(ocb_auth)
ELF(.type FUNC_NAME(ocb_auth),@function;)
@@ -2018,7 +2018,7 @@ FUNC_NAME(ocb_auth):
CFI_ENDPROC();
ELF(.size FUNC_NAME(ocb_auth),.-FUNC_NAME(ocb_auth);)
-.align 8
+.align 16
.globl FUNC_NAME(enc_blk1_32)
ELF(.type FUNC_NAME(enc_blk1_32),@function;)
@@ -2126,7 +2126,7 @@ FUNC_NAME(enc_blk1_32):
CFI_ENDPROC();
ELF(.size FUNC_NAME(enc_blk1_32),.-FUNC_NAME(enc_blk1_32);)
-.align 8
+.align 16
.globl FUNC_NAME(dec_blk1_32)
ELF(.type FUNC_NAME(dec_blk1_32),@function;)
diff --git a/cipher/camellia-gfni-avx512-amd64.S b/cipher/camellia-gfni-avx512-amd64.S
index 14725b4a..66949d43 100644
--- a/cipher/camellia-gfni-avx512-amd64.S
+++ b/cipher/camellia-gfni-avx512-amd64.S
@@ -691,7 +691,7 @@ ELF(.type _gcry_camellia_gfni_avx512__constants,@object;)
ELF(.size _gcry_camellia_gfni_avx512__constants,.-_gcry_camellia_gfni_avx512__constants;)
-.align 8
+.align 16
ELF(.type __camellia_gfni_avx512_enc_blk64,@function;)
__camellia_gfni_avx512_enc_blk64:
@@ -751,7 +751,7 @@ __camellia_gfni_avx512_enc_blk64:
CFI_ENDPROC();
ELF(.size __camellia_gfni_avx512_enc_blk64,.-__camellia_gfni_avx512_enc_blk64;)
-.align 8
+.align 16
ELF(.type __camellia_gfni_avx512_dec_blk64,@function;)
__camellia_gfni_avx512_dec_blk64:
@@ -820,7 +820,7 @@ ELF(.size __camellia_gfni_avx512_dec_blk64,.-__camellia_gfni_avx512_dec_blk64;)
kaddb %k1, %k1, %k1; \
vpaddq hi_counter1, out, out{%k1};
-.align 8
+.align 16
.globl _gcry_camellia_gfni_avx512_ctr_enc
ELF(.type _gcry_camellia_gfni_avx512_ctr_enc,@function;)
@@ -973,7 +973,7 @@ _gcry_camellia_gfni_avx512_ctr_enc:
CFI_ENDPROC();
ELF(.size _gcry_camellia_gfni_avx512_ctr_enc,.-_gcry_camellia_gfni_avx512_ctr_enc;)
-.align 8
+.align 16
.globl _gcry_camellia_gfni_avx512_cbc_dec
ELF(.type _gcry_camellia_gfni_avx512_cbc_dec,@function;)
@@ -1035,7 +1035,7 @@ _gcry_camellia_gfni_avx512_cbc_dec:
CFI_ENDPROC();
ELF(.size _gcry_camellia_gfni_avx512_cbc_dec,.-_gcry_camellia_gfni_avx512_cbc_dec;)
-.align 8
+.align 16
.globl _gcry_camellia_gfni_avx512_cfb_dec
ELF(.type _gcry_camellia_gfni_avx512_cfb_dec,@function;)
@@ -1108,7 +1108,7 @@ _gcry_camellia_gfni_avx512_cfb_dec:
CFI_ENDPROC();
ELF(.size _gcry_camellia_gfni_avx512_cfb_dec,.-_gcry_camellia_gfni_avx512_cfb_dec;)
-.align 8
+.align 16
.globl _gcry_camellia_gfni_avx512_ocb_enc
ELF(.type _gcry_camellia_gfni_avx512_ocb_enc,@function;)
@@ -1271,7 +1271,7 @@ _gcry_camellia_gfni_avx512_ocb_enc:
CFI_ENDPROC();
ELF(.size _gcry_camellia_gfni_avx512_ocb_enc,.-_gcry_camellia_gfni_avx512_ocb_enc;)
-.align 8
+.align 16
.globl _gcry_camellia_gfni_avx512_ocb_dec
ELF(.type _gcry_camellia_gfni_avx512_ocb_dec,@function;)
@@ -1440,7 +1440,7 @@ _gcry_camellia_gfni_avx512_ocb_dec:
CFI_ENDPROC();
ELF(.size _gcry_camellia_gfni_avx512_ocb_dec,.-_gcry_camellia_gfni_avx512_ocb_dec;)
-.align 8
+.align 16
.globl _gcry_camellia_gfni_avx512_enc_blk64
ELF(.type _gcry_camellia_gfni_avx512_enc_blk64,@function;)
@@ -1504,7 +1504,7 @@ _gcry_camellia_gfni_avx512_enc_blk64:
CFI_ENDPROC();
ELF(.size _gcry_camellia_gfni_avx512_enc_blk64,.-_gcry_camellia_gfni_avx512_enc_blk64;)
-.align 8
+.align 16
.globl _gcry_camellia_gfni_avx512_dec_blk64
ELF(.type _gcry_camellia_gfni_avx512_dec_blk64,@function;)
diff --git a/cipher/cast5-amd64.S b/cipher/cast5-amd64.S
index a804654c..b8ae8ba0 100644
--- a/cipher/cast5-amd64.S
+++ b/cipher/cast5-amd64.S
@@ -173,7 +173,7 @@
rorq $32, RLR0; \
movq RLR0, (RIO);
-.align 8
+.align 16
.globl _gcry_cast5_amd64_encrypt_block
ELF(.type _gcry_cast5_amd64_encrypt_block,@function;)
@@ -223,7 +223,7 @@ _gcry_cast5_amd64_encrypt_block:
CFI_ENDPROC();
ELF(.size _gcry_cast5_amd64_encrypt_block,.-_gcry_cast5_amd64_encrypt_block;)
-.align 8
+.align 16
.globl _gcry_cast5_amd64_decrypt_block
ELF(.type _gcry_cast5_amd64_decrypt_block,@function;)
@@ -373,7 +373,7 @@ ELF(.size _gcry_cast5_amd64_decrypt_block,.-_gcry_cast5_amd64_decrypt_block;)
rorq $32, c; \
rorq $32, d;
-.align 8
+.align 16
ELF(.type __cast5_enc_blk4,@function;)
__cast5_enc_blk4:
@@ -403,7 +403,7 @@ __cast5_enc_blk4:
CFI_ENDPROC();
ELF(.size __cast5_enc_blk4,.-__cast5_enc_blk4;)
-.align 8
+.align 16
ELF(.type __cast5_dec_blk4,@function;)
__cast5_dec_blk4:
@@ -435,7 +435,7 @@ __cast5_dec_blk4:
ret_spec_stop;
ELF(.size __cast5_dec_blk4,.-__cast5_dec_blk4;)
-.align 8
+.align 16
.globl _gcry_cast5_amd64_ctr_enc
ELF(.type _gcry_cast5_amd64_ctr_enc,@function;)
_gcry_cast5_amd64_ctr_enc:
@@ -512,7 +512,7 @@ _gcry_cast5_amd64_ctr_enc:
CFI_ENDPROC();
ELF(.size _gcry_cast5_amd64_ctr_enc,.-_gcry_cast5_amd64_ctr_enc;)
-.align 8
+.align 16
.globl _gcry_cast5_amd64_cbc_dec
ELF(.type _gcry_cast5_amd64_cbc_dec,@function;)
_gcry_cast5_amd64_cbc_dec:
@@ -586,7 +586,7 @@ _gcry_cast5_amd64_cbc_dec:
CFI_ENDPROC();
ELF(.size _gcry_cast5_amd64_cbc_dec,.-_gcry_cast5_amd64_cbc_dec;)
-.align 8
+.align 16
.globl _gcry_cast5_amd64_cfb_dec
ELF(.type _gcry_cast5_amd64_cfb_dec,@function;)
_gcry_cast5_amd64_cfb_dec:
diff --git a/cipher/chacha20-amd64-avx2.S b/cipher/chacha20-amd64-avx2.S
index 9f2a036a..407d651f 100644
--- a/cipher/chacha20-amd64-avx2.S
+++ b/cipher/chacha20-amd64-avx2.S
@@ -168,7 +168,7 @@ chacha20_data:
.Lunsigned_cmp:
.long 0x80000000
-.align 8
+.align 16
.globl _gcry_chacha20_amd64_avx2_blocks8
ELF(.type _gcry_chacha20_amd64_avx2_blocks8,@function;)
@@ -333,7 +333,7 @@ ELF(.size _gcry_chacha20_amd64_avx2_blocks8,
#define _ /*_*/
-.align 8
+.align 16
.globl _gcry_chacha20_poly1305_amd64_avx2_blocks8
ELF(.type _gcry_chacha20_poly1305_amd64_avx2_blocks8,@function;)
diff --git a/cipher/chacha20-amd64-ssse3.S b/cipher/chacha20-amd64-ssse3.S
index 6c737978..452d42e5 100644
--- a/cipher/chacha20-amd64-ssse3.S
+++ b/cipher/chacha20-amd64-ssse3.S
@@ -164,7 +164,7 @@ chacha20_data:
.Lunsigned_cmp:
.long 0x80000000,0x80000000,0x80000000,0x80000000
-.align 8
+.align 16
.globl _gcry_chacha20_amd64_ssse3_blocks4
ELF(.type _gcry_chacha20_amd64_ssse3_blocks4,@function;)
@@ -366,7 +366,7 @@ ELF(.size _gcry_chacha20_amd64_ssse3_blocks4,
ROTATE(x1, 7, tmp1); \
WORD_SHUF(x1, shuf_x1);
-.align 8
+.align 16
.globl _gcry_chacha20_amd64_ssse3_blocks1
ELF(.type _gcry_chacha20_amd64_ssse3_blocks1,@function;)
@@ -513,7 +513,7 @@ ELF(.size _gcry_chacha20_amd64_ssse3_blocks1,
#define _ /*_*/
-.align 8
+.align 16
.globl _gcry_chacha20_poly1305_amd64_ssse3_blocks4
ELF(.type _gcry_chacha20_poly1305_amd64_ssse3_blocks4,@function;)
@@ -781,7 +781,7 @@ ELF(.size _gcry_chacha20_poly1305_amd64_ssse3_blocks4,
2-way && 1-way stitched chacha20-poly1305
**********************************************************************/
-.align 8
+.align 16
.globl _gcry_chacha20_poly1305_amd64_ssse3_blocks1
ELF(.type _gcry_chacha20_poly1305_amd64_ssse3_blocks1,@function;)
diff --git a/cipher/des-amd64.S b/cipher/des-amd64.S
index c1bf9f29..51e40258 100644
--- a/cipher/des-amd64.S
+++ b/cipher/des-amd64.S
@@ -180,7 +180,7 @@
movl left##d, (io); \
movl right##d, 4(io);
-.align 8
+.align 16
.globl _gcry_3des_amd64_crypt_block
ELF(.type _gcry_3des_amd64_crypt_block,@function;)
@@ -473,7 +473,7 @@ ELF(.size _gcry_3des_amd64_crypt_block,.-_gcry_3des_amd64_crypt_block;)
movl left##d, (io); \
movl right##d, 4(io);
-.align 8
+.align 16
ELF(.type _gcry_3des_amd64_crypt_blk3,@function;)
_gcry_3des_amd64_crypt_blk3:
/* input:
@@ -548,7 +548,7 @@ _gcry_3des_amd64_crypt_blk3:
CFI_ENDPROC();
ELF(.size _gcry_3des_amd64_crypt_blk3,.-_gcry_3des_amd64_crypt_blk3;)
-.align 8
+.align 16
.globl _gcry_3des_amd64_cbc_dec
ELF(.type _gcry_3des_amd64_cbc_dec,@function;)
_gcry_3des_amd64_cbc_dec:
@@ -646,7 +646,7 @@ _gcry_3des_amd64_cbc_dec:
CFI_ENDPROC();
ELF(.size _gcry_3des_amd64_cbc_dec,.-_gcry_3des_amd64_cbc_dec;)
-.align 8
+.align 16
.globl _gcry_3des_amd64_ctr_enc
ELF(.type _gcry_3des_amd64_ctr_enc,@function;)
_gcry_3des_amd64_ctr_enc:
@@ -744,7 +744,7 @@ _gcry_3des_amd64_ctr_enc:
CFI_ENDPROC();
ELF(.size _gcry_3des_amd64_cbc_dec,.-_gcry_3des_amd64_cbc_dec;)
-.align 8
+.align 16
.globl _gcry_3des_amd64_cfb_dec
ELF(.type _gcry_3des_amd64_cfb_dec,@function;)
_gcry_3des_amd64_cfb_dec:
diff --git a/cipher/rijndael-amd64.S b/cipher/rijndael-amd64.S
index 6e3cc819..526c2b7b 100644
--- a/cipher/rijndael-amd64.S
+++ b/cipher/rijndael-amd64.S
@@ -200,7 +200,7 @@
#define lastencround(round) \
do_lastencround((round) + 1);
-.align 8
+.align 16
.globl _gcry_aes_amd64_encrypt_block
ELF(.type _gcry_aes_amd64_encrypt_block,@function;)
@@ -377,7 +377,7 @@ ELF(.size _gcry_aes_amd64_encrypt_block,.-_gcry_aes_amd64_encrypt_block;)
#define lastdecround(round) \
do_lastdecround(round);
-.align 8
+.align 16
.globl _gcry_aes_amd64_decrypt_block
ELF(.type _gcry_aes_amd64_decrypt_block,@function;)
diff --git a/cipher/rijndael-ssse3-amd64-asm.S b/cipher/rijndael-ssse3-amd64-asm.S
index b98dca26..0c5c8f46 100644
--- a/cipher/rijndael-ssse3-amd64-asm.S
+++ b/cipher/rijndael-ssse3-amd64-asm.S
@@ -47,6 +47,7 @@
##
## _gcry_aes_ssse3_enc_preload
##
+.align 16
ELF(.type _gcry_aes_ssse3_enc_preload,@function)
.globl _gcry_aes_ssse3_enc_preload
_gcry_aes_ssse3_enc_preload:
@@ -68,6 +69,7 @@ ELF(.size _gcry_aes_ssse3_enc_preload,.-_gcry_aes_ssse3_enc_preload)
##
## _gcry_aes_ssse3_dec_preload
##
+.align 16
ELF(.type _gcry_aes_ssse3_dec_preload,@function)
.globl _gcry_aes_ssse3_dec_preload
_gcry_aes_ssse3_dec_preload:
diff --git a/cipher/salsa20-amd64.S b/cipher/salsa20-amd64.S
index 64626063..6efb75e0 100644
--- a/cipher/salsa20-amd64.S
+++ b/cipher/salsa20-amd64.S
@@ -32,7 +32,7 @@
.text
-.align 8
+.align 16
.globl _gcry_salsa20_amd64_keysetup
ELF(.type _gcry_salsa20_amd64_keysetup,@function;)
_gcry_salsa20_amd64_keysetup:
@@ -86,7 +86,7 @@ _gcry_salsa20_amd64_keysetup:
ret_spec_stop
CFI_ENDPROC();
-.align 8
+.align 16
.globl _gcry_salsa20_amd64_ivsetup
ELF(.type _gcry_salsa20_amd64_ivsetup,@function;)
_gcry_salsa20_amd64_ivsetup:
@@ -102,7 +102,7 @@ _gcry_salsa20_amd64_ivsetup:
ret_spec_stop
CFI_ENDPROC();
-.align 8
+.align 16
.globl _gcry_salsa20_amd64_encrypt_blocks
ELF(.type _gcry_salsa20_amd64_encrypt_blocks,@function;)
_gcry_salsa20_amd64_encrypt_blocks:
diff --git a/cipher/serpent-avx2-amd64.S b/cipher/serpent-avx2-amd64.S
index 54ff61e4..26a21a36 100644
--- a/cipher/serpent-avx2-amd64.S
+++ b/cipher/serpent-avx2-amd64.S
@@ -401,7 +401,7 @@
.text
-.align 8
+.align 16
ELF(.type __serpent_enc_blk16,@function;)
__serpent_enc_blk16:
/* input:
@@ -491,7 +491,7 @@ __serpent_enc_blk16:
CFI_ENDPROC();
ELF(.size __serpent_enc_blk16,.-__serpent_enc_blk16;)
-.align 8
+.align 16
ELF(.type __serpent_dec_blk16,@function;)
__serpent_dec_blk16:
/* input:
@@ -583,7 +583,7 @@ __serpent_dec_blk16:
CFI_ENDPROC();
ELF(.size __serpent_dec_blk16,.-__serpent_dec_blk16;)
-.align 8
+.align 16
.globl _gcry_serpent_avx2_blk16
ELF(.type _gcry_serpent_avx2_blk16,@function;)
_gcry_serpent_avx2_blk16:
@@ -639,7 +639,7 @@ ELF(.size _gcry_serpent_avx2_blk16,.-_gcry_serpent_avx2_blk16;)
vpslldq $8, tmp, tmp; \
vpsubq tmp, x, x;
-.align 8
+.align 16
.globl _gcry_serpent_avx2_ctr_enc
ELF(.type _gcry_serpent_avx2_ctr_enc,@function;)
_gcry_serpent_avx2_ctr_enc:
@@ -751,7 +751,7 @@ _gcry_serpent_avx2_ctr_enc:
CFI_ENDPROC();
ELF(.size _gcry_serpent_avx2_ctr_enc,.-_gcry_serpent_avx2_ctr_enc;)
-.align 8
+.align 16
.globl _gcry_serpent_avx2_cbc_dec
ELF(.type _gcry_serpent_avx2_cbc_dec,@function;)
_gcry_serpent_avx2_cbc_dec:
@@ -804,7 +804,7 @@ _gcry_serpent_avx2_cbc_dec:
CFI_ENDPROC();
ELF(.size _gcry_serpent_avx2_cbc_dec,.-_gcry_serpent_avx2_cbc_dec;)
-.align 8
+.align 16
.globl _gcry_serpent_avx2_cfb_dec
ELF(.type _gcry_serpent_avx2_cfb_dec,@function;)
_gcry_serpent_avx2_cfb_dec:
@@ -859,7 +859,7 @@ _gcry_serpent_avx2_cfb_dec:
CFI_ENDPROC();
ELF(.size _gcry_serpent_avx2_cfb_dec,.-_gcry_serpent_avx2_cfb_dec;)
-.align 8
+.align 16
.globl _gcry_serpent_avx2_ocb_enc
ELF(.type _gcry_serpent_avx2_ocb_enc,@function;)
@@ -973,7 +973,7 @@ _gcry_serpent_avx2_ocb_enc:
CFI_ENDPROC();
ELF(.size _gcry_serpent_avx2_ocb_enc,.-_gcry_serpent_avx2_ocb_enc;)
-.align 8
+.align 16
.globl _gcry_serpent_avx2_ocb_dec
ELF(.type _gcry_serpent_avx2_ocb_dec,@function;)
@@ -1097,7 +1097,7 @@ _gcry_serpent_avx2_ocb_dec:
CFI_ENDPROC();
ELF(.size _gcry_serpent_avx2_ocb_dec,.-_gcry_serpent_avx2_ocb_dec;)
-.align 8
+.align 16
.globl _gcry_serpent_avx2_ocb_auth
ELF(.type _gcry_serpent_avx2_ocb_auth,@function;)
diff --git a/cipher/serpent-sse2-amd64.S b/cipher/serpent-sse2-amd64.S
index 01723a2a..885c2bf1 100644
--- a/cipher/serpent-sse2-amd64.S
+++ b/cipher/serpent-sse2-amd64.S
@@ -423,7 +423,7 @@
.text
-.align 8
+.align 16
ELF(.type __serpent_enc_blk8,@function;)
__serpent_enc_blk8:
/* input:
@@ -513,7 +513,7 @@ __serpent_enc_blk8:
CFI_ENDPROC();
ELF(.size __serpent_enc_blk8,.-__serpent_enc_blk8;)
-.align 8
+.align 16
ELF(.type __serpent_dec_blk8,@function;)
__serpent_dec_blk8:
/* input:
@@ -605,7 +605,7 @@ __serpent_dec_blk8:
CFI_ENDPROC();
ELF(.size __serpent_dec_blk8,.-__serpent_dec_blk8;)
-.align 8
+.align 16
.globl _gcry_serpent_sse2_blk8
ELF(.type _gcry_serpent_sse2_blk8,@function;)
_gcry_serpent_sse2_blk8:
@@ -670,7 +670,7 @@ _gcry_serpent_sse2_blk8:
CFI_ENDPROC();
ELF(.size _gcry_serpent_sse2_blk8,.-_gcry_serpent_sse2_blk8;)
-.align 8
+.align 16
.globl _gcry_serpent_sse2_ctr_enc
ELF(.type _gcry_serpent_sse2_ctr_enc,@function;)
_gcry_serpent_sse2_ctr_enc:
@@ -802,7 +802,7 @@ _gcry_serpent_sse2_ctr_enc:
CFI_ENDPROC();
ELF(.size _gcry_serpent_sse2_ctr_enc,.-_gcry_serpent_sse2_ctr_enc;)
-.align 8
+.align 16
.globl _gcry_serpent_sse2_cbc_dec
ELF(.type _gcry_serpent_sse2_cbc_dec,@function;)
_gcry_serpent_sse2_cbc_dec:
@@ -865,7 +865,7 @@ _gcry_serpent_sse2_cbc_dec:
CFI_ENDPROC();
ELF(.size _gcry_serpent_sse2_cbc_dec,.-_gcry_serpent_sse2_cbc_dec;)
-.align 8
+.align 16
.globl _gcry_serpent_sse2_cfb_dec
ELF(.type _gcry_serpent_sse2_cfb_dec,@function;)
_gcry_serpent_sse2_cfb_dec:
@@ -931,7 +931,7 @@ _gcry_serpent_sse2_cfb_dec:
CFI_ENDPROC();
ELF(.size _gcry_serpent_sse2_cfb_dec,.-_gcry_serpent_sse2_cfb_dec;)
-.align 8
+.align 16
.globl _gcry_serpent_sse2_ocb_enc
ELF(.type _gcry_serpent_sse2_ocb_enc,@function;)
@@ -1045,7 +1045,7 @@ _gcry_serpent_sse2_ocb_enc:
CFI_ENDPROC();
ELF(.size _gcry_serpent_sse2_ocb_enc,.-_gcry_serpent_sse2_ocb_enc;)
-.align 8
+.align 16
.globl _gcry_serpent_sse2_ocb_dec
ELF(.type _gcry_serpent_sse2_ocb_dec,@function;)
@@ -1169,7 +1169,7 @@ _gcry_serpent_sse2_ocb_dec:
CFI_ENDPROC();
ELF(.size _gcry_serpent_sse2_ocb_dec,.-_gcry_serpent_sse2_ocb_dec;)
-.align 8
+.align 16
.globl _gcry_serpent_sse2_ocb_auth
ELF(.type _gcry_serpent_sse2_ocb_auth,@function;)
diff --git a/cipher/sm4-aesni-avx-amd64.S b/cipher/sm4-aesni-avx-amd64.S
index 7a99e070..88f6e5c5 100644
--- a/cipher/sm4-aesni-avx-amd64.S
+++ b/cipher/sm4-aesni-avx-amd64.S
@@ -152,7 +152,7 @@
.L0f0f0f0f:
.long 0x0f0f0f0f
-.align 8
+.align 16
.globl _gcry_sm4_aesni_avx_expand_key
ELF(.type _gcry_sm4_aesni_avx_expand_key,@function;)
_gcry_sm4_aesni_avx_expand_key:
@@ -244,7 +244,7 @@ _gcry_sm4_aesni_avx_expand_key:
CFI_ENDPROC();
ELF(.size _gcry_sm4_aesni_avx_expand_key,.-_gcry_sm4_aesni_avx_expand_key;)
-.align 8
+.align 16
ELF(.type sm4_aesni_avx_crypt_blk1_4,@function;)
sm4_aesni_avx_crypt_blk1_4:
/* input:
@@ -349,7 +349,7 @@ sm4_aesni_avx_crypt_blk1_4:
CFI_ENDPROC();
ELF(.size sm4_aesni_avx_crypt_blk1_4,.-sm4_aesni_avx_crypt_blk1_4;)
-.align 8
+.align 16
ELF(.type __sm4_crypt_blk8,@function;)
__sm4_crypt_blk8:
/* input:
@@ -458,7 +458,7 @@ __sm4_crypt_blk8:
CFI_ENDPROC();
ELF(.size __sm4_crypt_blk8,.-__sm4_crypt_blk8;)
-.align 8
+.align 16
.globl _gcry_sm4_aesni_avx_crypt_blk1_8
ELF(.type _gcry_sm4_aesni_avx_crypt_blk1_8,@function;)
_gcry_sm4_aesni_avx_crypt_blk1_8:
@@ -512,7 +512,7 @@ _gcry_sm4_aesni_avx_crypt_blk1_8:
CFI_ENDPROC();
ELF(.size _gcry_sm4_aesni_avx_crypt_blk1_8,.-_gcry_sm4_aesni_avx_crypt_blk1_8;)
-.align 8
+.align 16
.globl _gcry_sm4_aesni_avx_ctr_enc
ELF(.type _gcry_sm4_aesni_avx_ctr_enc,@function;)
_gcry_sm4_aesni_avx_ctr_enc:
@@ -586,7 +586,7 @@ _gcry_sm4_aesni_avx_ctr_enc:
CFI_ENDPROC();
ELF(.size _gcry_sm4_aesni_avx_ctr_enc,.-_gcry_sm4_aesni_avx_ctr_enc;)
-.align 8
+.align 16
.globl _gcry_sm4_aesni_avx_cbc_dec
ELF(.type _gcry_sm4_aesni_avx_cbc_dec,@function;)
_gcry_sm4_aesni_avx_cbc_dec:
@@ -635,7 +635,7 @@ _gcry_sm4_aesni_avx_cbc_dec:
CFI_ENDPROC();
ELF(.size _gcry_sm4_aesni_avx_cbc_dec,.-_gcry_sm4_aesni_avx_cbc_dec;)
-.align 8
+.align 16
.globl _gcry_sm4_aesni_avx_cfb_dec
ELF(.type _gcry_sm4_aesni_avx_cfb_dec,@function;)
_gcry_sm4_aesni_avx_cfb_dec:
@@ -687,7 +687,7 @@ _gcry_sm4_aesni_avx_cfb_dec:
CFI_ENDPROC();
ELF(.size _gcry_sm4_aesni_avx_cfb_dec,.-_gcry_sm4_aesni_avx_cfb_dec;)
-.align 8
+.align 16
.globl _gcry_sm4_aesni_avx_ocb_enc
ELF(.type _gcry_sm4_aesni_avx_ocb_enc,@function;)
@@ -786,7 +786,7 @@ _gcry_sm4_aesni_avx_ocb_enc:
CFI_ENDPROC();
ELF(.size _gcry_sm4_aesni_avx_ocb_enc,.-_gcry_sm4_aesni_avx_ocb_enc;)
-.align 8
+.align 16
.globl _gcry_sm4_aesni_avx_ocb_dec
ELF(.type _gcry_sm4_aesni_avx_ocb_dec,@function;)
@@ -895,7 +895,7 @@ _gcry_sm4_aesni_avx_ocb_dec:
CFI_ENDPROC();
ELF(.size _gcry_sm4_aesni_avx_ocb_dec,.-_gcry_sm4_aesni_avx_ocb_dec;)
-.align 8
+.align 16
.globl _gcry_sm4_aesni_avx_ocb_auth
ELF(.type _gcry_sm4_aesni_avx_ocb_auth,@function;)
diff --git a/cipher/sm4-aesni-avx2-amd64.S b/cipher/sm4-aesni-avx2-amd64.S
index e09fed8f..514a0b4e 100644
--- a/cipher/sm4-aesni-avx2-amd64.S
+++ b/cipher/sm4-aesni-avx2-amd64.S
@@ -173,7 +173,7 @@
.L0f0f0f0f:
.long 0x0f0f0f0f
-.align 8
+.align 16
ELF(.type __sm4_crypt_blk16,@function;)
__sm4_crypt_blk16:
/* input:
@@ -288,7 +288,7 @@ __sm4_crypt_blk16:
CFI_ENDPROC();
ELF(.size __sm4_crypt_blk16,.-__sm4_crypt_blk16;)
-.align 8
+.align 16
.globl _gcry_sm4_aesni_avx2_crypt_blk1_16
ELF(.type _gcry_sm4_aesni_avx2_crypt_blk1_16,@function;)
_gcry_sm4_aesni_avx2_crypt_blk1_16:
@@ -354,7 +354,7 @@ ELF(.size _gcry_sm4_aesni_avx2_crypt_blk1_16,.-_gcry_sm4_aesni_avx2_crypt_blk1_1
vpslldq $8, tmp, tmp; \
vpsubq tmp, x, x;
-.align 8
+.align 16
.globl _gcry_sm4_aesni_avx2_ctr_enc
ELF(.type _gcry_sm4_aesni_avx2_ctr_enc,@function;)
_gcry_sm4_aesni_avx2_ctr_enc:
@@ -464,7 +464,7 @@ _gcry_sm4_aesni_avx2_ctr_enc:
CFI_ENDPROC();
ELF(.size _gcry_sm4_aesni_avx2_ctr_enc,.-_gcry_sm4_aesni_avx2_ctr_enc;)
-.align 8
+.align 16
.globl _gcry_sm4_aesni_avx2_cbc_dec
ELF(.type _gcry_sm4_aesni_avx2_cbc_dec,@function;)
_gcry_sm4_aesni_avx2_cbc_dec:
@@ -515,7 +515,7 @@ _gcry_sm4_aesni_avx2_cbc_dec:
CFI_ENDPROC();
ELF(.size _gcry_sm4_aesni_avx2_cbc_dec,.-_gcry_sm4_aesni_avx2_cbc_dec;)
-.align 8
+.align 16
.globl _gcry_sm4_aesni_avx2_cfb_dec
ELF(.type _gcry_sm4_aesni_avx2_cfb_dec,@function;)
_gcry_sm4_aesni_avx2_cfb_dec:
@@ -568,7 +568,7 @@ _gcry_sm4_aesni_avx2_cfb_dec:
CFI_ENDPROC();
ELF(.size _gcry_sm4_aesni_avx2_cfb_dec,.-_gcry_sm4_aesni_avx2_cfb_dec;)
-.align 8
+.align 16
.globl _gcry_sm4_aesni_avx2_ocb_enc
ELF(.type _gcry_sm4_aesni_avx2_ocb_enc,@function;)
@@ -680,7 +680,7 @@ _gcry_sm4_aesni_avx2_ocb_enc:
CFI_ENDPROC();
ELF(.size _gcry_sm4_aesni_avx2_ocb_enc,.-_gcry_sm4_aesni_avx2_ocb_enc;)
-.align 8
+.align 16
.globl _gcry_sm4_aesni_avx2_ocb_dec
ELF(.type _gcry_sm4_aesni_avx2_ocb_dec,@function;)
@@ -802,7 +802,7 @@ _gcry_sm4_aesni_avx2_ocb_dec:
CFI_ENDPROC();
ELF(.size _gcry_sm4_aesni_avx2_ocb_dec,.-_gcry_sm4_aesni_avx2_ocb_dec;)
-.align 8
+.align 16
.globl _gcry_sm4_aesni_avx2_ocb_auth
ELF(.type _gcry_sm4_aesni_avx2_ocb_auth,@function;)
diff --git a/cipher/sm4-gfni-avx2-amd64.S b/cipher/sm4-gfni-avx2-amd64.S
index 4ec0ea39..e21bd93b 100644
--- a/cipher/sm4-gfni-avx2-amd64.S
+++ b/cipher/sm4-gfni-avx2-amd64.S
@@ -133,7 +133,7 @@
.Lbswap32_mask:
.byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
-.align 8
+.align 16
.globl _gcry_sm4_gfni_avx2_expand_key
ELF(.type _gcry_sm4_gfni_avx2_expand_key,@function;)
_gcry_sm4_gfni_avx2_expand_key:
@@ -216,7 +216,7 @@ _gcry_sm4_gfni_avx2_expand_key:
CFI_ENDPROC();
ELF(.size _gcry_sm4_gfni_avx2_expand_key,.-_gcry_sm4_gfni_avx2_expand_key;)
-.align 8
+.align 16
ELF(.type sm4_gfni_avx2_crypt_blk1_4,@function;)
sm4_gfni_avx2_crypt_blk1_4:
/* input:
@@ -314,7 +314,7 @@ sm4_gfni_avx2_crypt_blk1_4:
CFI_ENDPROC();
ELF(.size sm4_gfni_avx2_crypt_blk1_4,.-sm4_gfni_avx2_crypt_blk1_4;)
-.align 8
+.align 16
ELF(.type __sm4_gfni_crypt_blk8,@function;)
__sm4_gfni_crypt_blk8:
/* input:
@@ -415,7 +415,7 @@ __sm4_gfni_crypt_blk8:
CFI_ENDPROC();
ELF(.size __sm4_gfni_crypt_blk8,.-__sm4_gfni_crypt_blk8;)
-.align 8
+.align 16
ELF(.type _gcry_sm4_gfni_avx2_crypt_blk1_8,@function;)
_gcry_sm4_gfni_avx2_crypt_blk1_8:
/* input:
@@ -472,7 +472,7 @@ ELF(.size _gcry_sm4_gfni_avx2_crypt_blk1_8,.-_gcry_sm4_gfni_avx2_crypt_blk1_8;)
16-way SM4 with GFNI and AVX2
**********************************************************************/
-.align 8
+.align 16
ELF(.type __sm4_gfni_crypt_blk16,@function;)
__sm4_gfni_crypt_blk16:
/* input:
@@ -573,7 +573,7 @@ __sm4_gfni_crypt_blk16:
CFI_ENDPROC();
ELF(.size __sm4_gfni_crypt_blk16,.-__sm4_gfni_crypt_blk16;)
-.align 8
+.align 16
.globl _gcry_sm4_gfni_avx2_crypt_blk1_16
ELF(.type _gcry_sm4_gfni_avx2_crypt_blk1_16,@function;)
_gcry_sm4_gfni_avx2_crypt_blk1_16:
@@ -641,7 +641,7 @@ ELF(.size _gcry_sm4_gfni_avx2_crypt_blk1_16,.-_gcry_sm4_gfni_avx2_crypt_blk1_16;
vpslldq $8, tmp, tmp; \
vpsubq tmp, x, x;
-.align 8
+.align 16
.globl _gcry_sm4_gfni_avx2_ctr_enc
ELF(.type _gcry_sm4_gfni_avx2_ctr_enc,@function;)
_gcry_sm4_gfni_avx2_ctr_enc:
@@ -751,7 +751,7 @@ _gcry_sm4_gfni_avx2_ctr_enc:
CFI_ENDPROC();
ELF(.size _gcry_sm4_gfni_avx2_ctr_enc,.-_gcry_sm4_gfni_avx2_ctr_enc;)
-.align 8
+.align 16
.globl _gcry_sm4_gfni_avx2_cbc_dec
ELF(.type _gcry_sm4_gfni_avx2_cbc_dec,@function;)
_gcry_sm4_gfni_avx2_cbc_dec:
@@ -802,7 +802,7 @@ _gcry_sm4_gfni_avx2_cbc_dec:
CFI_ENDPROC();
ELF(.size _gcry_sm4_gfni_avx2_cbc_dec,.-_gcry_sm4_gfni_avx2_cbc_dec;)
-.align 8
+.align 16
.globl _gcry_sm4_gfni_avx2_cfb_dec
ELF(.type _gcry_sm4_gfni_avx2_cfb_dec,@function;)
_gcry_sm4_gfni_avx2_cfb_dec:
@@ -855,7 +855,7 @@ _gcry_sm4_gfni_avx2_cfb_dec:
CFI_ENDPROC();
ELF(.size _gcry_sm4_gfni_avx2_cfb_dec,.-_gcry_sm4_gfni_avx2_cfb_dec;)
-.align 8
+.align 16
.globl _gcry_sm4_gfni_avx2_ocb_enc
ELF(.type _gcry_sm4_gfni_avx2_ocb_enc,@function;)
@@ -967,7 +967,7 @@ _gcry_sm4_gfni_avx2_ocb_enc:
CFI_ENDPROC();
ELF(.size _gcry_sm4_gfni_avx2_ocb_enc,.-_gcry_sm4_gfni_avx2_ocb_enc;)
-.align 8
+.align 16
.globl _gcry_sm4_gfni_avx2_ocb_dec
ELF(.type _gcry_sm4_gfni_avx2_ocb_dec,@function;)
@@ -1089,7 +1089,7 @@ _gcry_sm4_gfni_avx2_ocb_dec:
CFI_ENDPROC();
ELF(.size _gcry_sm4_gfni_avx2_ocb_dec,.-_gcry_sm4_gfni_avx2_ocb_dec;)
-.align 8
+.align 16
.globl _gcry_sm4_gfni_avx2_ocb_auth
ELF(.type _gcry_sm4_gfni_avx2_ocb_auth,@function;)
diff --git a/cipher/twofish-amd64.S b/cipher/twofish-amd64.S
index 8998d296..913b252d 100644
--- a/cipher/twofish-amd64.S
+++ b/cipher/twofish-amd64.S
@@ -161,7 +161,7 @@
xorl (w + 4 * (m))(CTX), x; \
movl x, (4 * (n))(out);
-.align 8
+.align 16
.globl _gcry_twofish_amd64_encrypt_block
ELF(.type _gcry_twofish_amd64_encrypt_block,@function;)
@@ -215,7 +215,7 @@ _gcry_twofish_amd64_encrypt_block:
CFI_ENDPROC();
ELF(.size _gcry_twofish_amd64_encrypt_block,.-_gcry_twofish_amd64_encrypt_block;)
-.align 8
+.align 16
.globl _gcry_twofish_amd64_decrypt_block
ELF(.type _gcry_twofish_amd64_decrypt_block,@function;)
@@ -486,7 +486,7 @@ ELF(.size _gcry_twofish_amd64_encrypt_block,.-_gcry_twofish_amd64_encrypt_block;
rorq $32, RAB2; \
outunpack3(RAB, 2);
-.align 8
+.align 16
ELF(.type __twofish_enc_blk3,@function;)
__twofish_enc_blk3:
@@ -515,7 +515,7 @@ __twofish_enc_blk3:
CFI_ENDPROC();
ELF(.size __twofish_enc_blk3,.-__twofish_enc_blk3;)
-.align 8
+.align 16
ELF(.type __twofish_dec_blk3,@function;)
__twofish_dec_blk3:
@@ -544,7 +544,7 @@ __twofish_dec_blk3:
CFI_ENDPROC();
ELF(.size __twofish_dec_blk3,.-__twofish_dec_blk3;)
-.align 8
+.align 16
.globl _gcry_twofish_amd64_blk3
ELF(.type _gcry_twofish_amd64_blk3,@function;)
_gcry_twofish_amd64_blk3:
@@ -618,7 +618,7 @@ _gcry_twofish_amd64_blk3:
CFI_ENDPROC();
ELF(.size _gcry_twofish_amd64_blk3,.-_gcry_twofish_amd64_blk3;)
-.align 8
+.align 16
.globl _gcry_twofish_amd64_ctr_enc
ELF(.type _gcry_twofish_amd64_ctr_enc,@function;)
_gcry_twofish_amd64_ctr_enc:
@@ -719,7 +719,7 @@ _gcry_twofish_amd64_ctr_enc:
CFI_ENDPROC();
ELF(.size _gcry_twofish_amd64_ctr_enc,.-_gcry_twofish_amd64_ctr_enc;)
-.align 8
+.align 16
.globl _gcry_twofish_amd64_cbc_dec
ELF(.type _gcry_twofish_amd64_cbc_dec,@function;)
_gcry_twofish_amd64_cbc_dec:
@@ -804,7 +804,7 @@ _gcry_twofish_amd64_cbc_dec:
CFI_ENDPROC();
ELF(.size _gcry_twofish_amd64_cbc_dec,.-_gcry_twofish_amd64_cbc_dec;)
-.align 8
+.align 16
.globl _gcry_twofish_amd64_cfb_dec
ELF(.type _gcry_twofish_amd64_cfb_dec,@function;)
_gcry_twofish_amd64_cfb_dec:
@@ -889,7 +889,7 @@ _gcry_twofish_amd64_cfb_dec:
CFI_ENDPROC();
ELF(.size _gcry_twofish_amd64_cfb_dec,.-_gcry_twofish_amd64_cfb_dec;)
-.align 8
+.align 16
.globl _gcry_twofish_amd64_ocb_enc
ELF(.type _gcry_twofish_amd64_ocb_enc,@function;)
_gcry_twofish_amd64_ocb_enc:
@@ -1015,7 +1015,7 @@ _gcry_twofish_amd64_ocb_enc:
CFI_ENDPROC();
ELF(.size _gcry_twofish_amd64_ocb_enc,.-_gcry_twofish_amd64_ocb_enc;)
-.align 8
+.align 16
.globl _gcry_twofish_amd64_ocb_dec
ELF(.type _gcry_twofish_amd64_ocb_dec,@function;)
_gcry_twofish_amd64_ocb_dec:
@@ -1149,7 +1149,7 @@ _gcry_twofish_amd64_ocb_dec:
CFI_ENDPROC();
ELF(.size _gcry_twofish_amd64_ocb_dec,.-_gcry_twofish_amd64_ocb_dec;)
-.align 8
+.align 16
.globl _gcry_twofish_amd64_ocb_auth
ELF(.type _gcry_twofish_amd64_ocb_auth,@function;)
_gcry_twofish_amd64_ocb_auth:
diff --git a/cipher/twofish-avx2-amd64.S b/cipher/twofish-avx2-amd64.S
index 0cb9a64c..6c6729c0 100644
--- a/cipher/twofish-avx2-amd64.S
+++ b/cipher/twofish-avx2-amd64.S
@@ -402,7 +402,7 @@
outunpack_dec8(a ## 0, b ## 0, c ## 0, d ## 0); \
outunpack_dec8(a ## 1, b ## 1, c ## 1, d ## 1);
-.align 8
+.align 16
ELF(.type __twofish_enc_blk16,@function;)
__twofish_enc_blk16:
/* input:
@@ -435,7 +435,7 @@ __twofish_enc_blk16:
CFI_ENDPROC();
ELF(.size __twofish_enc_blk16,.-__twofish_enc_blk16;)
-.align 8
+.align 16
ELF(.type __twofish_dec_blk16,@function;)
__twofish_dec_blk16:
/* input:
@@ -468,7 +468,7 @@ __twofish_dec_blk16:
CFI_ENDPROC();
ELF(.size __twofish_dec_blk16,.-__twofish_dec_blk16;)
-.align 8
+.align 16
.globl _gcry_twofish_avx2_blk16
ELF(.type _gcry_twofish_avx2_blk16,@function;)
_gcry_twofish_avx2_blk16:
@@ -520,7 +520,7 @@ ELF(.size _gcry_twofish_avx2_blk16,.-_gcry_twofish_avx2_blk16;)
vpslldq $8, tmp, tmp; \
vpsubq tmp, x, x;
-.align 8
+.align 16
.globl _gcry_twofish_avx2_ctr_enc
ELF(.type _gcry_twofish_avx2_ctr_enc,@function;)
_gcry_twofish_avx2_ctr_enc:
@@ -632,7 +632,7 @@ _gcry_twofish_avx2_ctr_enc:
CFI_ENDPROC();
ELF(.size _gcry_twofish_avx2_ctr_enc,.-_gcry_twofish_avx2_ctr_enc;)
-.align 8
+.align 16
.globl _gcry_twofish_avx2_cbc_dec
ELF(.type _gcry_twofish_avx2_cbc_dec,@function;)
_gcry_twofish_avx2_cbc_dec:
@@ -685,7 +685,7 @@ _gcry_twofish_avx2_cbc_dec:
CFI_ENDPROC();
ELF(.size _gcry_twofish_avx2_cbc_dec,.-_gcry_twofish_avx2_cbc_dec;)
-.align 8
+.align 16
.globl _gcry_twofish_avx2_cfb_dec
ELF(.type _gcry_twofish_avx2_cfb_dec,@function;)
_gcry_twofish_avx2_cfb_dec:
@@ -740,7 +740,7 @@ _gcry_twofish_avx2_cfb_dec:
CFI_ENDPROC();
ELF(.size _gcry_twofish_avx2_cfb_dec,.-_gcry_twofish_avx2_cfb_dec;)
-.align 8
+.align 16
.globl _gcry_twofish_avx2_ocb_enc
ELF(.type _gcry_twofish_avx2_ocb_enc,@function;)
@@ -854,7 +854,7 @@ _gcry_twofish_avx2_ocb_enc:
CFI_ENDPROC();
ELF(.size _gcry_twofish_avx2_ocb_enc,.-_gcry_twofish_avx2_ocb_enc;)
-.align 8
+.align 16
.globl _gcry_twofish_avx2_ocb_dec
ELF(.type _gcry_twofish_avx2_ocb_dec,@function;)
@@ -979,7 +979,7 @@ _gcry_twofish_avx2_ocb_dec:
CFI_ENDPROC();
ELF(.size _gcry_twofish_avx2_ocb_dec,.-_gcry_twofish_avx2_ocb_dec;)
-.align 8
+.align 16
.globl _gcry_twofish_avx2_ocb_auth
ELF(.type _gcry_twofish_avx2_ocb_auth,@function;)