summaryrefslogtreecommitdiff
path: root/cipher
diff options
context:
space:
mode:
Diffstat (limited to 'cipher')
-rw-r--r--cipher/arcfour-amd64.S6
-rw-r--r--cipher/asm-common-amd64.h101
-rw-r--r--cipher/blake2b-amd64-avx2.S2
-rw-r--r--cipher/blake2s-amd64-avx.S2
-rw-r--r--cipher/blowfish-amd64.S46
-rw-r--r--cipher/camellia-aesni-avx-amd64.S230
-rw-r--r--cipher/camellia-aesni-avx2-amd64.S164
-rw-r--r--cipher/cast5-amd64.S80
-rw-r--r--cipher/chacha20-amd64-avx2.S20
-rw-r--r--cipher/chacha20-amd64-ssse3.S38
-rw-r--r--cipher/des-amd64.S74
-rw-r--r--cipher/rijndael-amd64.S26
-rw-r--r--cipher/rijndael-ssse3-amd64-asm.S10
-rw-r--r--cipher/salsa20-amd64.S27
-rw-r--r--cipher/serpent-avx2-amd64.S66
-rw-r--r--cipher/serpent-sse2-amd64.S64
-rw-r--r--cipher/sha1-avx-amd64.S30
-rw-r--r--cipher/sha1-avx-bmi2-amd64.S34
-rw-r--r--cipher/sha1-avx2-bmi2-amd64.S34
-rw-r--r--cipher/sha1-ssse3-amd64.S30
-rw-r--r--cipher/sha256-avx-amd64.S26
-rw-r--r--cipher/sha256-avx2-bmi2-amd64.S29
-rw-r--r--cipher/sha256-ssse3-amd64.S26
-rw-r--r--cipher/sha512-avx-amd64.S26
-rw-r--r--cipher/sha512-avx2-bmi2-amd64.S30
-rw-r--r--cipher/sha512-ssse3-amd64.S26
-rw-r--r--cipher/twofish-amd64.S118
-rw-r--r--cipher/twofish-avx2-amd64.S66
-rw-r--r--cipher/whirlpool-sse2-amd64.S28
29 files changed, 1109 insertions, 350 deletions
diff --git a/cipher/arcfour-amd64.S b/cipher/arcfour-amd64.S
index c08f3453..221dfeff 100644
--- a/cipher/arcfour-amd64.S
+++ b/cipher/arcfour-amd64.S
@@ -25,9 +25,12 @@
.globl _gcry_arcfour_amd64
ELF(.type _gcry_arcfour_amd64,@function)
_gcry_arcfour_amd64:
+ CFI_STARTPROC()
ENTER_SYSV_FUNC_PARAMS_0_4
push %rbp
+ CFI_PUSH(%rbp)
push %rbx
+ CFI_PUSH(%rbx)
mov %rdi, %rbp # key = ARG(key)
mov %rsi, %rbx # rbx = ARG(len)
mov %rdx, %rsi # in = ARG(in)
@@ -92,9 +95,12 @@ _gcry_arcfour_amd64:
movb %cl, (4*256)(%rbp) # key->y = y
movb %dl, (4*256+4)(%rbp) # key->x = x
pop %rbx
+ CFI_POP(%rbx)
pop %rbp
+ CFI_POP(%rbp)
EXIT_SYSV_FUNC
ret
+ CFI_ENDPROC()
.L__gcry_arcfour_amd64_end:
ELF(.size _gcry_arcfour_amd64,.L__gcry_arcfour_amd64_end-_gcry_arcfour_amd64)
diff --git a/cipher/asm-common-amd64.h b/cipher/asm-common-amd64.h
index 7eb42649..9d4a028a 100644
--- a/cipher/asm-common-amd64.h
+++ b/cipher/asm-common-amd64.h
@@ -41,6 +41,12 @@
# define RIP
#endif
+#ifdef __PIC__
+# define ADD_RIP +rip
+#else
+# define ADD_RIP
+#endif
+
#if defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS) || !defined(__PIC__)
# define GET_EXTERN_POINTER(name, reg) movabsq $name, reg
#else
@@ -60,10 +66,101 @@
# endif
#endif
+#ifdef HAVE_GCC_ASM_CFI_DIRECTIVES
+/* CFI directives to emit DWARF stack unwinding information. */
+# define CFI_STARTPROC() .cfi_startproc
+# define CFI_ENDPROC() .cfi_endproc
+# define CFI_REMEMBER_STATE() .cfi_remember_state
+# define CFI_RESTORE_STATE() .cfi_restore_state
+# define CFI_ADJUST_CFA_OFFSET(off) .cfi_adjust_cfa_offset off
+# define CFI_REL_OFFSET(reg,off) .cfi_rel_offset reg, off
+# define CFI_DEF_CFA_REGISTER(reg) .cfi_def_cfa_register reg
+# define CFI_REGISTER(ro,rn) .cfi_register ro, rn
+# define CFI_RESTORE(reg) .cfi_restore reg
+
+# define CFI_PUSH(reg) \
+ CFI_ADJUST_CFA_OFFSET(8); CFI_REL_OFFSET(reg, 0)
+# define CFI_POP(reg) \
+ CFI_ADJUST_CFA_OFFSET(-8); CFI_RESTORE(reg)
+# define CFI_POP_TMP_REG() \
+ CFI_ADJUST_CFA_OFFSET(-8);
+# define CFI_LEAVE() \
+ CFI_ADJUST_CFA_OFFSET(-8); CFI_DEF_CFA_REGISTER(%rsp)
+
+/* CFA expressions are used for pointing CFA and registers to
+ * %rsp relative offsets. */
+# define DW_REGNO_rax 0
+# define DW_REGNO_rdx 1
+# define DW_REGNO_rcx 2
+# define DW_REGNO_rbx 3
+# define DW_REGNO_rsi 4
+# define DW_REGNO_rdi 5
+# define DW_REGNO_rbp 6
+# define DW_REGNO_rsp 7
+# define DW_REGNO_r8 8
+# define DW_REGNO_r9 9
+# define DW_REGNO_r10 10
+# define DW_REGNO_r11 11
+# define DW_REGNO_r12 12
+# define DW_REGNO_r13 13
+# define DW_REGNO_r14 14
+# define DW_REGNO_r15 15
+
+# define DW_REGNO(reg) DW_REGNO_ ## reg
+
+/* Fixed length encoding used for integers for now. */
+# define DW_SLEB128_7BIT(value) \
+ 0x00|((value) & 0x7f)
+# define DW_SLEB128_28BIT(value) \
+ 0x80|((value)&0x7f), \
+ 0x80|(((value)>>7)&0x7f), \
+ 0x80|(((value)>>14)&0x7f), \
+ 0x00|(((value)>>21)&0x7f)
+
+# define CFI_CFA_ON_STACK(rsp_offs,cfa_depth) \
+ .cfi_escape \
+ 0x0f, /* DW_CFA_def_cfa_expression */ \
+ DW_SLEB128_7BIT(11), /* length */ \
+ 0x77, /* DW_OP_breg7, rsp + constant */ \
+ DW_SLEB128_28BIT(rsp_offs), \
+ 0x06, /* DW_OP_deref */ \
+ 0x23, /* DW_OP_plus_constu */ \
+ DW_SLEB128_28BIT((cfa_depth)+8)
+
+# define CFI_REG_ON_STACK(reg,rsp_offs) \
+ .cfi_escape \
+ 0x10, /* DW_CFA_expression */ \
+ DW_SLEB128_7BIT(DW_REGNO(reg)), \
+ DW_SLEB128_7BIT(5), /* length */ \
+ 0x77, /* DW_OP_breg7, rsp + constant */ \
+ DW_SLEB128_28BIT(rsp_offs)
+
+#else
+# define CFI_STARTPROC()
+# define CFI_ENDPROC()
+# define CFI_REMEMBER_STATE()
+# define CFI_RESTORE_STATE()
+# define CFI_ADJUST_CFA_OFFSET(off)
+# define CFI_REL_OFFSET(reg,off)
+# define CFI_DEF_CFA_REGISTER(reg)
+# define CFI_REGISTER(ro,rn)
+# define CFI_RESTORE(reg)
+
+# define CFI_PUSH(reg)
+# define CFI_POP(reg)
+# define CFI_POP_TMP_REG()
+# define CFI_LEAVE()
+
+# define CFI_CFA_ON_STACK(rsp_offs,cfa_depth)
+# define CFI_REG_ON_STACK(reg,rsp_offs)
+#endif
+
#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
# define ENTER_SYSV_FUNC_PARAMS_0_4 \
pushq %rdi; \
+ CFI_PUSH(%rdi); \
pushq %rsi; \
+ CFI_PUSH(%rsi); \
movq %rcx, %rdi; \
movq %rdx, %rsi; \
movq %r8, %rdx; \
@@ -79,7 +176,9 @@
# define EXIT_SYSV_FUNC \
popq %rsi; \
- popq %rdi;
+ CFI_POP(%rsi); \
+ popq %rdi; \
+ CFI_POP(%rdi);
#else
# define ENTER_SYSV_FUNC_PARAMS_0_4
# define ENTER_SYSV_FUNC_PARAMS_5
diff --git a/cipher/blake2b-amd64-avx2.S b/cipher/blake2b-amd64-avx2.S
index 6bcc5652..08c816cd 100644
--- a/cipher/blake2b-amd64-avx2.S
+++ b/cipher/blake2b-amd64-avx2.S
@@ -207,6 +207,7 @@ _gcry_blake2b_transform_amd64_avx2:
* %rsi: blks
* %rdx: num_blks
*/
+ CFI_STARTPROC();
vzeroupper;
@@ -291,6 +292,7 @@ _gcry_blake2b_transform_amd64_avx2:
xor %eax, %eax;
vzeroall;
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_blake2b_transform_amd64_avx2,
.-_gcry_blake2b_transform_amd64_avx2;)
diff --git a/cipher/blake2s-amd64-avx.S b/cipher/blake2s-amd64-avx.S
index f7312dbd..19837326 100644
--- a/cipher/blake2s-amd64-avx.S
+++ b/cipher/blake2s-amd64-avx.S
@@ -191,6 +191,7 @@ _gcry_blake2s_transform_amd64_avx:
* %rsi: blks
* %rdx: num_blks
*/
+ CFI_STARTPROC();
vzeroupper;
@@ -269,6 +270,7 @@ _gcry_blake2s_transform_amd64_avx:
xor %eax, %eax;
vzeroall;
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_blake2s_transform_amd64_avx,
.-_gcry_blake2s_transform_amd64_avx;)
diff --git a/cipher/blowfish-amd64.S b/cipher/blowfish-amd64.S
index 02d3b710..bdb361d7 100644
--- a/cipher/blowfish-amd64.S
+++ b/cipher/blowfish-amd64.S
@@ -133,7 +133,9 @@ __blowfish_enc_blk1:
* output:
* RX0: output plaintext block
*/
+ CFI_STARTPROC();
movq %rbp, %r11;
+ CFI_REGISTER(%rbp, %r11);
load_roundkey_enc(0);
round_enc(2);
@@ -147,8 +149,10 @@ __blowfish_enc_blk1:
add_roundkey_enc();
movq %r11, %rbp;
+ CFI_RESTORE(%rbp)
ret;
+ CFI_ENDPROC();
ELF(.size __blowfish_enc_blk1,.-__blowfish_enc_blk1;)
.align 8
@@ -161,6 +165,7 @@ _gcry_blowfish_amd64_do_encrypt:
* %rsi: u32 *ret_xl
* %rdx: u32 *ret_xr
*/
+ CFI_STARTPROC();
ENTER_SYSV_FUNC_PARAMS_0_4
movl (%rdx), RX0d;
@@ -178,6 +183,7 @@ _gcry_blowfish_amd64_do_encrypt:
EXIT_SYSV_FUNC
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_blowfish_amd64_do_encrypt,.-_gcry_blowfish_amd64_do_encrypt;)
.align 8
@@ -190,6 +196,7 @@ _gcry_blowfish_amd64_encrypt_block:
* %rsi: dst
* %rdx: src
*/
+ CFI_STARTPROC();
ENTER_SYSV_FUNC_PARAMS_0_4
movq %rsi, %r10;
@@ -204,6 +211,7 @@ _gcry_blowfish_amd64_encrypt_block:
EXIT_SYSV_FUNC
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_blowfish_amd64_encrypt_block,.-_gcry_blowfish_amd64_encrypt_block;)
.align 8
@@ -216,9 +224,11 @@ _gcry_blowfish_amd64_decrypt_block:
* %rsi: dst
* %rdx: src
*/
+ CFI_STARTPROC();
ENTER_SYSV_FUNC_PARAMS_0_4
movq %rbp, %r11;
+ CFI_REGISTER(%rbp, %r11);
movq %rsi, %r10;
movq %rdx, RIO;
@@ -240,9 +250,11 @@ _gcry_blowfish_amd64_decrypt_block:
write_block();
movq %r11, %rbp;
+ CFI_RESTORE(%rbp);
EXIT_SYSV_FUNC
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_blowfish_amd64_decrypt_block,.-_gcry_blowfish_amd64_decrypt_block;)
/**********************************************************************
@@ -340,6 +352,7 @@ __blowfish_enc_blk4:
* output:
* RX0,RX1,RX2,RX3: four output ciphertext blocks
*/
+ CFI_STARTPROC();
preload_roundkey_enc(0);
round_enc4(0);
@@ -355,6 +368,7 @@ __blowfish_enc_blk4:
outbswap_block4();
ret;
+ CFI_ENDPROC();
ELF(.size __blowfish_enc_blk4,.-__blowfish_enc_blk4;)
.align 8
@@ -367,6 +381,7 @@ __blowfish_dec_blk4:
* output:
* RX0,RX1,RX2,RX3: four output plaintext blocks
*/
+ CFI_STARTPROC();
preload_roundkey_dec(17);
inbswap_block4();
@@ -384,6 +399,7 @@ __blowfish_dec_blk4:
outbswap_block4();
ret;
+ CFI_ENDPROC();
ELF(.size __blowfish_dec_blk4,.-__blowfish_dec_blk4;)
.align 8
@@ -396,12 +412,17 @@ _gcry_blowfish_amd64_ctr_enc:
* %rdx: src (4 blocks)
* %rcx: iv (big endian, 64bit)
*/
+ CFI_STARTPROC();
ENTER_SYSV_FUNC_PARAMS_0_4
pushq %rbp;
+ CFI_PUSH(%rbp);
pushq %rbx;
+ CFI_PUSH(%rbx);
pushq %r12;
+ CFI_PUSH(%r12);
pushq %r13;
+ CFI_PUSH(%r13);
/* %r11-%r13 are not used by __blowfish_enc_blk4 */
movq %rcx, %r13; /*iv*/
@@ -438,12 +459,17 @@ _gcry_blowfish_amd64_ctr_enc:
movq RX3, 3 * 8(%r11);
popq %r13;
+ CFI_POP(%r13);
popq %r12;
+ CFI_POP(%r12);
popq %rbx;
+ CFI_POP(%rbx);
popq %rbp;
+ CFI_POP(%rbp);
EXIT_SYSV_FUNC
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_blowfish_amd64_ctr_enc,.-_gcry_blowfish_amd64_ctr_enc;)
.align 8
@@ -456,12 +482,17 @@ _gcry_blowfish_amd64_cbc_dec:
* %rdx: src (4 blocks)
* %rcx: iv (64bit)
*/
+ CFI_STARTPROC();
ENTER_SYSV_FUNC_PARAMS_0_4
pushq %rbp;
+ CFI_PUSH(%rbp);
pushq %rbx;
+ CFI_PUSH(%rbx);
pushq %r12;
+ CFI_PUSH(%r12);
pushq %r13;
+ CFI_PUSH(%r13);
/* %r11-%r13 are not used by __blowfish_dec_blk4 */
movq %rsi, %r11; /*dst*/
@@ -489,12 +520,17 @@ _gcry_blowfish_amd64_cbc_dec:
movq RX3, 3 * 8(%r11);
popq %r13;
+ CFI_POP(%r13);
popq %r12;
+ CFI_POP(%r12);
popq %rbx;
+ CFI_POP(%rbx);
popq %rbp;
+ CFI_POP(%rbp);
EXIT_SYSV_FUNC
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_blowfish_amd64_cbc_dec,.-_gcry_blowfish_amd64_cbc_dec;)
.align 8
@@ -507,12 +543,17 @@ _gcry_blowfish_amd64_cfb_dec:
* %rdx: src (4 blocks)
* %rcx: iv (64bit)
*/
+ CFI_STARTPROC();
ENTER_SYSV_FUNC_PARAMS_0_4
pushq %rbp;
+ CFI_PUSH(%rbp);
pushq %rbx;
+ CFI_PUSH(%rbx);
pushq %r12;
+ CFI_PUSH(%r12);
pushq %r13;
+ CFI_PUSH(%r13);
/* %r11-%r13 are not used by __blowfish_enc_blk4 */
movq %rcx, %r13; /*iv*/
@@ -543,12 +584,17 @@ _gcry_blowfish_amd64_cfb_dec:
movq RX3, 3 * 8(%r11);
popq %r13;
+ CFI_POP(%r13);
popq %r12;
+ CFI_POP(%r12);
popq %rbx;
+ CFI_POP(%rbx);
popq %rbp;
+ CFI_POP(%rbp);
EXIT_SYSV_FUNC
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_blowfish_amd64_cfb_dec,.-_gcry_blowfish_amd64_cfb_dec;)
#endif /*defined(USE_BLOWFISH)*/
diff --git a/cipher/camellia-aesni-avx-amd64.S b/cipher/camellia-aesni-avx-amd64.S
index 8022934f..e16d4f61 100644
--- a/cipher/camellia-aesni-avx-amd64.S
+++ b/cipher/camellia-aesni-avx-amd64.S
@@ -24,17 +24,7 @@
defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT)
-#ifdef __PIC__
-# define RIP (%rip)
-#else
-# define RIP
-#endif
-
-#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
-# define ELF(...) __VA_ARGS__
-#else
-# define ELF(...) /*_*/
-#endif
+#include "asm-common-amd64.h"
#define CAMELLIA_TABLE_BYTE_LEN 272
@@ -75,10 +65,10 @@
/* \
* S-function with AES subbytes \
*/ \
- vmovdqa .Linv_shift_row RIP, t4; \
- vbroadcastss .L0f0f0f0f RIP, t7; \
- vmovdqa .Lpre_tf_lo_s1 RIP, t0; \
- vmovdqa .Lpre_tf_hi_s1 RIP, t1; \
+ vmovdqa .Linv_shift_row rRIP, t4; \
+ vbroadcastss .L0f0f0f0f rRIP, t7; \
+ vmovdqa .Lpre_tf_lo_s1 rRIP, t0; \
+ vmovdqa .Lpre_tf_hi_s1 rRIP, t1; \
\
/* AES inverse shift rows */ \
vpshufb t4, x0, x0; \
@@ -91,8 +81,8 @@
vpshufb t4, x6, x6; \
\
/* prefilter sboxes 1, 2 and 3 */ \
- vmovdqa .Lpre_tf_lo_s4 RIP, t2; \
- vmovdqa .Lpre_tf_hi_s4 RIP, t3; \
+ vmovdqa .Lpre_tf_lo_s4 rRIP, t2; \
+ vmovdqa .Lpre_tf_hi_s4 rRIP, t3; \
filter_8bit(x0, t0, t1, t7, t6); \
filter_8bit(x7, t0, t1, t7, t6); \
filter_8bit(x1, t0, t1, t7, t6); \
@@ -106,8 +96,8 @@
filter_8bit(x6, t2, t3, t7, t6); \
\
/* AES subbytes + AES shift rows */ \
- vmovdqa .Lpost_tf_lo_s1 RIP, t0; \
- vmovdqa .Lpost_tf_hi_s1 RIP, t1; \
+ vmovdqa .Lpost_tf_lo_s1 rRIP, t0; \
+ vmovdqa .Lpost_tf_hi_s1 rRIP, t1; \
vaesenclast t4, x0, x0; \
vaesenclast t4, x7, x7; \
vaesenclast t4, x1, x1; \
@@ -118,16 +108,16 @@
vaesenclast t4, x6, x6; \
\
/* postfilter sboxes 1 and 4 */ \
- vmovdqa .Lpost_tf_lo_s3 RIP, t2; \
- vmovdqa .Lpost_tf_hi_s3 RIP, t3; \
+ vmovdqa .Lpost_tf_lo_s3 rRIP, t2; \
+ vmovdqa .Lpost_tf_hi_s3 rRIP, t3; \
filter_8bit(x0, t0, t1, t7, t6); \
filter_8bit(x7, t0, t1, t7, t6); \
filter_8bit(x3, t0, t1, t7, t6); \
filter_8bit(x6, t0, t1, t7, t6); \
\
/* postfilter sbox 3 */ \
- vmovdqa .Lpost_tf_lo_s2 RIP, t4; \
- vmovdqa .Lpost_tf_hi_s2 RIP, t5; \
+ vmovdqa .Lpost_tf_lo_s2 rRIP, t4; \
+ vmovdqa .Lpost_tf_hi_s2 rRIP, t5; \
filter_8bit(x2, t2, t3, t7, t6); \
filter_8bit(x5, t2, t3, t7, t6); \
\
@@ -442,7 +432,7 @@
transpose_4x4(c0, c1, c2, c3, a0, a1); \
transpose_4x4(d0, d1, d2, d3, a0, a1); \
\
- vmovdqu .Lshufb_16x16b RIP, a0; \
+ vmovdqu .Lshufb_16x16b rRIP, a0; \
vmovdqu st1, a1; \
vpshufb a0, a2, a2; \
vpshufb a0, a3, a3; \
@@ -508,7 +498,7 @@
vpunpcklwd t1, t3, e; \
vpunpckhwd t1, t3, f; \
\
- vmovdqa .Ltranspose_8x8_shuf RIP, t3; \
+ vmovdqa .Ltranspose_8x8_shuf rRIP, t3; \
\
vpunpcklwd g, c, d; \
vpunpckhwd g, c, c; \
@@ -540,7 +530,7 @@
#define inpack16_pre(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
y6, y7, rio, key) \
vmovq key, x0; \
- vpshufb .Lpack_bswap RIP, x0, x0; \
+ vpshufb .Lpack_bswap rRIP, x0, x0; \
\
vpxor 0 * 16(rio), x0, y7; \
vpxor 1 * 16(rio), x0, y6; \
@@ -591,7 +581,7 @@
vmovdqu x0, stack_tmp0; \
\
vmovq key, x0; \
- vpshufb .Lpack_bswap RIP, x0, x0; \
+ vpshufb .Lpack_bswap rRIP, x0, x0; \
\
vpxor x0, y7, y7; \
vpxor x0, y6, y6; \
@@ -786,6 +776,7 @@ __camellia_enc_blk16:
* %xmm0..%xmm15: 16 encrypted blocks, order swapped:
* 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8
*/
+ CFI_STARTPROC();
leaq 8 * 16(%rax), %rcx;
@@ -859,6 +850,7 @@ __camellia_enc_blk16:
%xmm15, %rax, %rcx, 24);
jmp .Lenc_done;
+ CFI_ENDPROC();
ELF(.size __camellia_enc_blk16,.-__camellia_enc_blk16;)
.align 8
@@ -874,6 +866,7 @@ __camellia_dec_blk16:
* %xmm0..%xmm15: 16 plaintext blocks, order swapped:
* 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8
*/
+ CFI_STARTPROC();
leaq 8 * 16(%rax), %rcx;
@@ -944,6 +937,7 @@ __camellia_dec_blk16:
((key_table + (24) * 8) + 4)(CTX));
jmp .Ldec_max24;
+ CFI_ENDPROC();
ELF(.size __camellia_dec_blk16,.-__camellia_dec_blk16;)
#define inc_le128(x, minus_one, tmp) \
@@ -963,9 +957,12 @@ _gcry_camellia_aesni_avx_ctr_enc:
* %rdx: src (16 blocks)
* %rcx: iv (big endian, 128bit)
*/
+ CFI_STARTPROC();
pushq %rbp;
+ CFI_PUSH(%rbp);
movq %rsp, %rbp;
+ CFI_DEF_CFA_REGISTER(%rbp);
vzeroupper;
@@ -973,7 +970,7 @@ _gcry_camellia_aesni_avx_ctr_enc:
andq $~31, %rsp;
movq %rsp, %rax;
- vmovdqa .Lbswap128_mask RIP, %xmm14;
+ vmovdqa .Lbswap128_mask rRIP, %xmm14;
/* load IV and byteswap */
vmovdqu (%rcx), %xmm15;
@@ -1018,12 +1015,12 @@ _gcry_camellia_aesni_avx_ctr_enc:
vmovdqa %xmm0, %xmm13;
vpshufb %xmm14, %xmm0, %xmm0;
inc_le128(%xmm13, %xmm15, %xmm14);
- vpshufb .Lbswap128_mask RIP, %xmm13, %xmm13; /* le => be */
+ vpshufb .Lbswap128_mask rRIP, %xmm13, %xmm13; /* le => be */
vmovdqu %xmm13, (%rcx);
/* inpack16_pre: */
vmovq (key_table)(CTX), %xmm15;
- vpshufb .Lpack_bswap RIP, %xmm15, %xmm15;
+ vpshufb .Lpack_bswap rRIP, %xmm15, %xmm15;
vpxor %xmm0, %xmm15, %xmm0;
vpxor %xmm1, %xmm15, %xmm1;
vpxor %xmm2, %xmm15, %xmm2;
@@ -1067,7 +1064,9 @@ _gcry_camellia_aesni_avx_ctr_enc:
vzeroall;
leave;
+ CFI_LEAVE();
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_camellia_aesni_avx_ctr_enc,.-_gcry_camellia_aesni_avx_ctr_enc;)
.align 8
@@ -1081,9 +1080,12 @@ _gcry_camellia_aesni_avx_cbc_dec:
* %rdx: src (16 blocks)
* %rcx: iv
*/
+ CFI_STARTPROC();
pushq %rbp;
+ CFI_PUSH(%rbp);
movq %rsp, %rbp;
+ CFI_DEF_CFA_REGISTER(%rbp);
vzeroupper;
@@ -1135,7 +1137,9 @@ _gcry_camellia_aesni_avx_cbc_dec:
vzeroall;
leave;
+ CFI_LEAVE();
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_camellia_aesni_avx_cbc_dec,.-_gcry_camellia_aesni_avx_cbc_dec;)
.align 8
@@ -1149,9 +1153,12 @@ _gcry_camellia_aesni_avx_cfb_dec:
* %rdx: src (16 blocks)
* %rcx: iv
*/
+ CFI_STARTPROC();
pushq %rbp;
+ CFI_PUSH(%rbp);
movq %rsp, %rbp;
+ CFI_DEF_CFA_REGISTER(%rbp);
vzeroupper;
@@ -1161,7 +1168,7 @@ _gcry_camellia_aesni_avx_cfb_dec:
/* inpack16_pre: */
vmovq (key_table)(CTX), %xmm0;
- vpshufb .Lpack_bswap RIP, %xmm0, %xmm0;
+ vpshufb .Lpack_bswap rRIP, %xmm0, %xmm0;
vpxor (%rcx), %xmm0, %xmm15;
vmovdqu 15 * 16(%rdx), %xmm1;
vmovdqu %xmm1, (%rcx); /* store new IV */
@@ -1207,7 +1214,9 @@ _gcry_camellia_aesni_avx_cfb_dec:
vzeroall;
leave;
+ CFI_LEAVE();
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_camellia_aesni_avx_cfb_dec,.-_gcry_camellia_aesni_avx_cfb_dec;)
.align 8
@@ -1223,9 +1232,12 @@ _gcry_camellia_aesni_avx_ocb_enc:
* %r8 : checksum
* %r9 : L pointers (void *L[16])
*/
+ CFI_STARTPROC();
pushq %rbp;
+ CFI_PUSH(%rbp);
movq %rsp, %rbp;
+ CFI_DEF_CFA_REGISTER(%rbp);
vzeroupper;
@@ -1233,10 +1245,14 @@ _gcry_camellia_aesni_avx_ocb_enc:
andq $~31, %rsp;
movq %rsp, %rax;
- movq %r10, (16 * 16 + 0 * 8)(%rax);
- movq %r11, (16 * 16 + 1 * 8)(%rax);
- movq %r12, (16 * 16 + 2 * 8)(%rax);
- movq %r13, (16 * 16 + 3 * 8)(%rax);
+ movq %r10, (16 * 16 + 0 * 8)(%rsp);
+ movq %r11, (16 * 16 + 1 * 8)(%rsp);
+ movq %r12, (16 * 16 + 2 * 8)(%rsp);
+ movq %r13, (16 * 16 + 3 * 8)(%rsp);
+ CFI_REG_ON_STACK(r10, 16 * 16 + 0 * 8);
+ CFI_REG_ON_STACK(r11, 16 * 16 + 1 * 8);
+ CFI_REG_ON_STACK(r12, 16 * 16 + 2 * 8);
+ CFI_REG_ON_STACK(r13, 16 * 16 + 3 * 8);
vmovdqu (%rcx), %xmm14;
vmovdqu (%r8), %xmm15;
@@ -1292,7 +1308,7 @@ _gcry_camellia_aesni_avx_ocb_enc:
/* inpack16_pre: */
vmovq (key_table)(CTX), %xmm15;
- vpshufb .Lpack_bswap RIP, %xmm15, %xmm15;
+ vpshufb .Lpack_bswap rRIP, %xmm15, %xmm15;
vpxor %xmm0, %xmm15, %xmm0;
vpxor %xmm1, %xmm15, %xmm1;
vpxor %xmm2, %xmm15, %xmm2;
@@ -1335,13 +1351,19 @@ _gcry_camellia_aesni_avx_ocb_enc:
vzeroall;
- movq (16 * 16 + 0 * 8)(%rax), %r10;
- movq (16 * 16 + 1 * 8)(%rax), %r11;
- movq (16 * 16 + 2 * 8)(%rax), %r12;
- movq (16 * 16 + 3 * 8)(%rax), %r13;
+ movq (16 * 16 + 0 * 8)(%rsp), %r10;
+ movq (16 * 16 + 1 * 8)(%rsp), %r11;
+ movq (16 * 16 + 2 * 8)(%rsp), %r12;
+ movq (16 * 16 + 3 * 8)(%rsp), %r13;
+ CFI_RESTORE(%r10);
+ CFI_RESTORE(%r11);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
leave;
+ CFI_LEAVE();
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_camellia_aesni_avx_ocb_enc,.-_gcry_camellia_aesni_avx_ocb_enc;)
.align 8
@@ -1357,9 +1379,12 @@ _gcry_camellia_aesni_avx_ocb_dec:
* %r8 : checksum
* %r9 : L pointers (void *L[16])
*/
+ CFI_STARTPROC();
pushq %rbp;
+ CFI_PUSH(%rbp);
movq %rsp, %rbp;
+ CFI_DEF_CFA_REGISTER(%rbp);
vzeroupper;
@@ -1367,10 +1392,14 @@ _gcry_camellia_aesni_avx_ocb_dec:
andq $~31, %rsp;
movq %rsp, %rax;
- movq %r10, (16 * 16 + 0 * 8)(%rax);
- movq %r11, (16 * 16 + 1 * 8)(%rax);
- movq %r12, (16 * 16 + 2 * 8)(%rax);
- movq %r13, (16 * 16 + 3 * 8)(%rax);
+ movq %r10, (16 * 16 + 0 * 8)(%rsp);
+ movq %r11, (16 * 16 + 1 * 8)(%rsp);
+ movq %r12, (16 * 16 + 2 * 8)(%rsp);
+ movq %r13, (16 * 16 + 3 * 8)(%rsp);
+ CFI_REG_ON_STACK(r10, 16 * 16 + 0 * 8);
+ CFI_REG_ON_STACK(r11, 16 * 16 + 1 * 8);
+ CFI_REG_ON_STACK(r12, 16 * 16 + 2 * 8);
+ CFI_REG_ON_STACK(r13, 16 * 16 + 3 * 8);
vmovdqu (%rcx), %xmm15;
@@ -1428,7 +1457,7 @@ _gcry_camellia_aesni_avx_ocb_dec:
/* inpack16_pre: */
vmovq (key_table)(CTX, %r8, 8), %xmm15;
- vpshufb .Lpack_bswap RIP, %xmm15, %xmm15;
+ vpshufb .Lpack_bswap rRIP, %xmm15, %xmm15;
vpxor %xmm0, %xmm15, %xmm0;
vpxor %xmm1, %xmm15, %xmm1;
vpxor %xmm2, %xmm15, %xmm2;
@@ -1493,13 +1522,19 @@ _gcry_camellia_aesni_avx_ocb_dec:
vzeroall;
- movq (16 * 16 + 0 * 8)(%rax), %r10;
- movq (16 * 16 + 1 * 8)(%rax), %r11;
- movq (16 * 16 + 2 * 8)(%rax), %r12;
- movq (16 * 16 + 3 * 8)(%rax), %r13;
+ movq (16 * 16 + 0 * 8)(%rsp), %r10;
+ movq (16 * 16 + 1 * 8)(%rsp), %r11;
+ movq (16 * 16 + 2 * 8)(%rsp), %r12;
+ movq (16 * 16 + 3 * 8)(%rsp), %r13;
+ CFI_RESTORE(%r10);
+ CFI_RESTORE(%r11);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
leave;
+ CFI_LEAVE();
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_camellia_aesni_avx_ocb_dec,.-_gcry_camellia_aesni_avx_ocb_dec;)
.align 8
@@ -1514,9 +1549,12 @@ _gcry_camellia_aesni_avx_ocb_auth:
* %rcx: checksum
* %r8 : L pointers (void *L[16])
*/
+ CFI_STARTPROC();
pushq %rbp;
+ CFI_PUSH(%rbp);
movq %rsp, %rbp;
+ CFI_DEF_CFA_REGISTER(%rbp);
vzeroupper;
@@ -1524,10 +1562,14 @@ _gcry_camellia_aesni_avx_ocb_auth:
andq $~31, %rsp;
movq %rsp, %rax;
- movq %r10, (16 * 16 + 0 * 8)(%rax);
- movq %r11, (16 * 16 + 1 * 8)(%rax);
- movq %r12, (16 * 16 + 2 * 8)(%rax);
- movq %r13, (16 * 16 + 3 * 8)(%rax);
+ movq %r10, (16 * 16 + 0 * 8)(%rsp);
+ movq %r11, (16 * 16 + 1 * 8)(%rsp);
+ movq %r12, (16 * 16 + 2 * 8)(%rsp);
+ movq %r13, (16 * 16 + 3 * 8)(%rsp);
+ CFI_REG_ON_STACK(r10, 16 * 16 + 0 * 8);
+ CFI_REG_ON_STACK(r11, 16 * 16 + 1 * 8);
+ CFI_REG_ON_STACK(r12, 16 * 16 + 2 * 8);
+ CFI_REG_ON_STACK(r13, 16 * 16 + 3 * 8);
vmovdqu (%rdx), %xmm15;
@@ -1580,7 +1622,7 @@ _gcry_camellia_aesni_avx_ocb_auth:
/* inpack16_pre: */
vmovq (key_table)(CTX), %xmm15;
- vpshufb .Lpack_bswap RIP, %xmm15, %xmm15;
+ vpshufb .Lpack_bswap rRIP, %xmm15, %xmm15;
vpxor %xmm0, %xmm15, %xmm0;
vpxor %xmm1, %xmm15, %xmm1;
vpxor %xmm2, %xmm15, %xmm2;
@@ -1623,13 +1665,19 @@ _gcry_camellia_aesni_avx_ocb_auth:
vzeroall;
- movq (16 * 16 + 0 * 8)(%rax), %r10;
- movq (16 * 16 + 1 * 8)(%rax), %r11;
- movq (16 * 16 + 2 * 8)(%rax), %r12;
- movq (16 * 16 + 3 * 8)(%rax), %r13;
+ movq (16 * 16 + 0 * 8)(%rsp), %r10;
+ movq (16 * 16 + 1 * 8)(%rsp), %r11;
+ movq (16 * 16 + 2 * 8)(%rsp), %r12;
+ movq (16 * 16 + 3 * 8)(%rsp), %r13;
+ CFI_RESTORE(%r10);
+ CFI_RESTORE(%r11);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
leave;
+ CFI_LEAVE();
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_camellia_aesni_avx_ocb_auth,.-_gcry_camellia_aesni_avx_ocb_auth;)
/*
@@ -1657,8 +1705,8 @@ ELF(.size _gcry_camellia_aesni_avx_ocb_auth,.-_gcry_camellia_aesni_avx_ocb_auth;
vpand sbox4mask, t0, t0; \
vpor t0, x, x; \
\
- vmovdqa .Lpost_tf_lo_s1 RIP, t0; \
- vmovdqa .Lpost_tf_hi_s1 RIP, t1; \
+ vmovdqa .Lpost_tf_lo_s1 rRIP, t0; \
+ vmovdqa .Lpost_tf_hi_s1 rRIP, t1; \
\
/* prefilter sboxes */ \
filter_8bit(x, pre_s1lo_mask, pre_s1hi_mask, _0f0f0f0fmask, t2); \
@@ -1672,18 +1720,18 @@ ELF(.size _gcry_camellia_aesni_avx_ocb_auth,.-_gcry_camellia_aesni_avx_ocb_auth;
/* output rotation for sbox2 (<<< 1) */ \
/* output rotation for sbox3 (>>> 1) */ \
vpshufb inv_shift_row, x, t1; \
- vpshufb .Lsp0044440444044404mask RIP, x, t4; \
- vpshufb .Lsp1110111010011110mask RIP, x, x; \
+ vpshufb .Lsp0044440444044404mask rRIP, x, t4; \
+ vpshufb .Lsp1110111010011110mask rRIP, x, x; \
vpaddb t1, t1, t2; \
vpsrlw $7, t1, t0; \
vpsllw $7, t1, t3; \
vpor t0, t2, t0; \
vpsrlw $1, t1, t1; \
- vpshufb .Lsp0222022222000222mask RIP, t0, t0; \
+ vpshufb .Lsp0222022222000222mask rRIP, t0, t0; \
vpor t1, t3, t1; \
\
vpxor x, t4, t4; \
- vpshufb .Lsp3033303303303033mask RIP, t1, t1; \
+ vpshufb .Lsp3033303303303033mask rRIP, t1, t1; \
vpxor t4, t0, t0; \
vpxor t1, t0, t0; \
vpsrldq $8, t0, x; \
@@ -1741,17 +1789,19 @@ __camellia_avx_setup128:
* %rdi: ctx, CTX; subkey storage at key_table(CTX)
* %xmm0: key
*/
+ CFI_STARTPROC();
+
#define cmll_sub(n, ctx) (key_table+((n)*8))(ctx)
#define KL128 %xmm0
#define KA128 %xmm2
- vpshufb .Lbswap128_mask RIP, KL128, KL128;
+ vpshufb .Lbswap128_mask rRIP, KL128, KL128;
- vmovdqa .Linv_shift_row_and_unpcklbw RIP, %xmm11;
- vmovq .Lsbox4_input_mask RIP, %xmm12;
- vbroadcastss .L0f0f0f0f RIP, %xmm13;
- vmovdqa .Lpre_tf_lo_s1 RIP, %xmm14;
- vmovdqa .Lpre_tf_hi_s1 RIP, %xmm15;
+ vmovdqa .Linv_shift_row_and_unpcklbw rRIP, %xmm11;
+ vmovq .Lsbox4_input_mask rRIP, %xmm12;
+ vbroadcastss .L0f0f0f0f rRIP, %xmm13;
+ vmovdqa .Lpre_tf_lo_s1 rRIP, %xmm14;
+ vmovdqa .Lpre_tf_hi_s1 rRIP, %xmm15;
/*
* Generate KA
@@ -1763,18 +1813,18 @@ __camellia_avx_setup128:
camellia_f(%xmm2, %xmm4, %xmm1,
%xmm5, %xmm6, %xmm7, %xmm8,
- %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma1 RIP);
+ %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma1 rRIP);
vpxor %xmm4, %xmm3, %xmm3;
camellia_f(%xmm3, %xmm2, %xmm1,
%xmm5, %xmm6, %xmm7, %xmm8,
- %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma2 RIP);
+ %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma2 rRIP);
camellia_f(%xmm2, %xmm3, %xmm1,
%xmm5, %xmm6, %xmm7, %xmm8,
- %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma3 RIP);
+ %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma3 rRIP);
vpxor %xmm4, %xmm3, %xmm3;
camellia_f(%xmm3, %xmm4, %xmm1,
%xmm5, %xmm6, %xmm7, %xmm8,
- %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma4 RIP);
+ %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma4 rRIP);
vpslldq $8, %xmm3, %xmm3;
vpxor %xmm4, %xmm2, %xmm2;
@@ -2076,6 +2126,7 @@ __camellia_avx_setup128:
vzeroall;
ret;
+ CFI_ENDPROC();
ELF(.size __camellia_avx_setup128,.-__camellia_avx_setup128;)
.align 8
@@ -2086,19 +2137,21 @@ __camellia_avx_setup256:
* %rdi: ctx, CTX; subkey storage at key_table(CTX)
* %xmm0 & %xmm1: key
*/
+ CFI_STARTPROC();
+
#define KL128 %xmm0
#define KR128 %xmm1
#define KA128 %xmm2
#define KB128 %xmm3
- vpshufb .Lbswap128_mask RIP, KL128, KL128;
- vpshufb .Lbswap128_mask RIP, KR128, KR128;
+ vpshufb .Lbswap128_mask rRIP, KL128, KL128;
+ vpshufb .Lbswap128_mask rRIP, KR128, KR128;
- vmovdqa .Linv_shift_row_and_unpcklbw RIP, %xmm11;
- vmovq .Lsbox4_input_mask RIP, %xmm12;
- vbroadcastss .L0f0f0f0f RIP, %xmm13;
- vmovdqa .Lpre_tf_lo_s1 RIP, %xmm14;
- vmovdqa .Lpre_tf_hi_s1 RIP, %xmm15;
+ vmovdqa .Linv_shift_row_and_unpcklbw rRIP, %xmm11;
+ vmovq .Lsbox4_input_mask rRIP, %xmm12;
+ vbroadcastss .L0f0f0f0f rRIP, %xmm13;
+ vmovdqa .Lpre_tf_lo_s1 rRIP, %xmm14;
+ vmovdqa .Lpre_tf_hi_s1 rRIP, %xmm15;
/*
* Generate KA
@@ -2111,20 +2164,20 @@ __camellia_avx_setup256:
camellia_f(%xmm2, %xmm4, %xmm5,
%xmm7, %xmm8, %xmm9, %xmm10,
- %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma1 RIP);
+ %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma1 rRIP);
vpxor %xmm4, %xmm3, %xmm3;
camellia_f(%xmm3, %xmm2, %xmm5,
%xmm7, %xmm8, %xmm9, %xmm10,
- %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma2 RIP);
+ %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma2 rRIP);
vpxor %xmm6, %xmm2, %xmm2;
camellia_f(%xmm2, %xmm3, %xmm5,
%xmm7, %xmm8, %xmm9, %xmm10,
- %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma3 RIP);
+ %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma3 rRIP);
vpxor %xmm4, %xmm3, %xmm3;
vpxor KR128, %xmm3, %xmm3;
camellia_f(%xmm3, %xmm4, %xmm5,
%xmm7, %xmm8, %xmm9, %xmm10,
- %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma4 RIP);
+ %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma4 rRIP);
vpslldq $8, %xmm3, %xmm3;
vpxor %xmm4, %xmm2, %xmm2;
@@ -2142,12 +2195,12 @@ __camellia_avx_setup256:
camellia_f(%xmm4, %xmm5, %xmm6,
%xmm7, %xmm8, %xmm9, %xmm10,
- %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma5 RIP);
+ %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma5 rRIP);
vpxor %xmm5, %xmm3, %xmm3;
camellia_f(%xmm3, %xmm5, %xmm6,
%xmm7, %xmm8, %xmm9, %xmm10,
- %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma6 RIP);
+ %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma6 rRIP);
vpslldq $8, %xmm3, %xmm3;
vpxor %xmm5, %xmm4, %xmm4;
vpsrldq $8, %xmm3, %xmm3;
@@ -2553,6 +2606,7 @@ __camellia_avx_setup256:
vzeroall;
ret;
+ CFI_ENDPROC();
ELF(.size __camellia_avx_setup256,.-__camellia_avx_setup256;)
.align 8
@@ -2565,6 +2619,7 @@ _gcry_camellia_aesni_avx_keygen:
* %rsi: key
* %rdx: keylen
*/
+ CFI_STARTPROC();
vzeroupper;
@@ -2585,6 +2640,7 @@ _gcry_camellia_aesni_avx_keygen:
vpor %xmm2, %xmm1, %xmm1;
jmp __camellia_avx_setup256;
+ CFI_ENDPROC();
ELF(.size _gcry_camellia_aesni_avx_keygen,.-_gcry_camellia_aesni_avx_keygen;)
#endif /*defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT)*/
diff --git a/cipher/camellia-aesni-avx2-amd64.S b/cipher/camellia-aesni-avx2-amd64.S
index 897e4aee..cc01c774 100644
--- a/cipher/camellia-aesni-avx2-amd64.S
+++ b/cipher/camellia-aesni-avx2-amd64.S
@@ -24,17 +24,7 @@
defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT)
-#ifdef __PIC__
-# define RIP (%rip)
-#else
-# define RIP
-#endif
-
-#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
-# define ELF(...) __VA_ARGS__
-#else
-# define ELF(...) /*_*/
-#endif
+#include "asm-common-amd64.h"
#define CAMELLIA_TABLE_BYTE_LEN 272
@@ -92,12 +82,12 @@
/* \
* S-function with AES subbytes \
*/ \
- vbroadcasti128 .Linv_shift_row RIP, t4; \
- vpbroadcastd .L0f0f0f0f RIP, t7; \
- vbroadcasti128 .Lpre_tf_lo_s1 RIP, t5; \
- vbroadcasti128 .Lpre_tf_hi_s1 RIP, t6; \
- vbroadcasti128 .Lpre_tf_lo_s4 RIP, t2; \
- vbroadcasti128 .Lpre_tf_hi_s4 RIP, t3; \
+ vbroadcasti128 .Linv_shift_row rRIP, t4; \
+ vpbroadcastd .L0f0f0f0f rRIP, t7; \
+ vbroadcasti128 .Lpre_tf_lo_s1 rRIP, t5; \
+ vbroadcasti128 .Lpre_tf_hi_s1 rRIP, t6; \
+ vbroadcasti128 .Lpre_tf_lo_s4 rRIP, t2; \
+ vbroadcasti128 .Lpre_tf_hi_s4 rRIP, t3; \
\
/* AES inverse shift rows */ \
vpshufb t4, x0, x0; \
@@ -143,8 +133,8 @@
vinserti128 $1, t2##_x, x6, x6; \
vextracti128 $1, x1, t3##_x; \
vextracti128 $1, x4, t2##_x; \
- vbroadcasti128 .Lpost_tf_lo_s1 RIP, t0; \
- vbroadcasti128 .Lpost_tf_hi_s1 RIP, t1; \
+ vbroadcasti128 .Lpost_tf_lo_s1 rRIP, t0; \
+ vbroadcasti128 .Lpost_tf_hi_s1 rRIP, t1; \
vaesenclast t4##_x, x2##_x, x2##_x; \
vaesenclast t4##_x, t6##_x, t6##_x; \
vaesenclast t4##_x, x5##_x, x5##_x; \
@@ -159,16 +149,16 @@
vinserti128 $1, t2##_x, x4, x4; \
\
/* postfilter sboxes 1 and 4 */ \
- vbroadcasti128 .Lpost_tf_lo_s3 RIP, t2; \
- vbroadcasti128 .Lpost_tf_hi_s3 RIP, t3; \
+ vbroadcasti128 .Lpost_tf_lo_s3 rRIP, t2; \
+ vbroadcasti128 .Lpost_tf_hi_s3 rRIP, t3; \
filter_8bit(x0, t0, t1, t7, t4); \
filter_8bit(x7, t0, t1, t7, t4); \
filter_8bit(x3, t0, t1, t7, t6); \
filter_8bit(x6, t0, t1, t7, t6); \
\
/* postfilter sbox 3 */ \
- vbroadcasti128 .Lpost_tf_lo_s2 RIP, t4; \
- vbroadcasti128 .Lpost_tf_hi_s2 RIP, t5; \
+ vbroadcasti128 .Lpost_tf_lo_s2 rRIP, t4; \
+ vbroadcasti128 .Lpost_tf_hi_s2 rRIP, t5; \
filter_8bit(x2, t2, t3, t7, t6); \
filter_8bit(x5, t2, t3, t7, t6); \
\
@@ -485,7 +475,7 @@
transpose_4x4(c0, c1, c2, c3, a0, a1); \
transpose_4x4(d0, d1, d2, d3, a0, a1); \
\
- vbroadcasti128 .Lshufb_16x16b RIP, a0; \
+ vbroadcasti128 .Lshufb_16x16b rRIP, a0; \
vmovdqu st1, a1; \
vpshufb a0, a2, a2; \
vpshufb a0, a3, a3; \
@@ -524,7 +514,7 @@
#define inpack32_pre(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
y6, y7, rio, key) \
vpbroadcastq key, x0; \
- vpshufb .Lpack_bswap RIP, x0, x0; \
+ vpshufb .Lpack_bswap rRIP, x0, x0; \
\
vpxor 0 * 32(rio), x0, y7; \
vpxor 1 * 32(rio), x0, y6; \
@@ -575,7 +565,7 @@
vmovdqu x0, stack_tmp0; \
\
vpbroadcastq key, x0; \
- vpshufb .Lpack_bswap RIP, x0, x0; \
+ vpshufb .Lpack_bswap rRIP, x0, x0; \
\
vpxor x0, y7, y7; \
vpxor x0, y6, y6; \
@@ -765,6 +755,7 @@ __camellia_enc_blk32:
* %ymm0..%ymm15: 32 encrypted blocks, order swapped:
* 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8
*/
+ CFI_STARTPROC();
leaq 8 * 32(%rax), %rcx;
@@ -838,6 +829,7 @@ __camellia_enc_blk32:
%ymm15, %rax, %rcx, 24);
jmp .Lenc_done;
+ CFI_ENDPROC();
ELF(.size __camellia_enc_blk32,.-__camellia_enc_blk32;)
.align 8
@@ -853,6 +845,7 @@ __camellia_dec_blk32:
* %ymm0..%ymm15: 16 plaintext blocks, order swapped:
* 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8
*/
+ CFI_STARTPROC();
leaq 8 * 32(%rax), %rcx;
@@ -923,6 +916,7 @@ __camellia_dec_blk32:
((key_table + (24) * 8) + 4)(CTX));
jmp .Ldec_max24;
+ CFI_ENDPROC();
ELF(.size __camellia_dec_blk32,.-__camellia_dec_blk32;)
#define inc_le128(x, minus_one, tmp) \
@@ -942,9 +936,12 @@ _gcry_camellia_aesni_avx2_ctr_enc:
* %rdx: src (32 blocks)
* %rcx: iv (big endian, 128bit)
*/
+ CFI_STARTPROC();
pushq %rbp;
+ CFI_PUSH(%rbp);
movq %rsp, %rbp;
+ CFI_DEF_CFA_REGISTER(%rbp);
movq 8(%rcx), %r11;
bswapq %r11;
@@ -960,10 +957,10 @@ _gcry_camellia_aesni_avx2_ctr_enc:
/* load IV and byteswap */
vmovdqu (%rcx), %xmm0;
- vpshufb .Lbswap128_mask RIP, %xmm0, %xmm0;
+ vpshufb .Lbswap128_mask rRIP, %xmm0, %xmm0;
vmovdqa %xmm0, %xmm1;
inc_le128(%xmm0, %xmm15, %xmm14);
- vbroadcasti128 .Lbswap128_mask RIP, %ymm14;
+ vbroadcasti128 .Lbswap128_mask rRIP, %ymm14;
vinserti128 $1, %xmm0, %ymm1, %ymm0;
vpshufb %ymm14, %ymm0, %ymm13;
vmovdqu %ymm13, 15 * 32(%rax);
@@ -1064,14 +1061,14 @@ _gcry_camellia_aesni_avx2_ctr_enc:
vextracti128 $1, %ymm0, %xmm13;
vpshufb %ymm14, %ymm0, %ymm0;
inc_le128(%xmm13, %xmm15, %xmm14);
- vpshufb .Lbswap128_mask RIP, %xmm13, %xmm13;
+ vpshufb .Lbswap128_mask rRIP, %xmm13, %xmm13;
vmovdqu %xmm13, (%rcx);
.align 4
.Lload_ctr_done:
/* inpack16_pre: */
vpbroadcastq (key_table)(CTX), %ymm15;
- vpshufb .Lpack_bswap RIP, %ymm15, %ymm15;
+ vpshufb .Lpack_bswap rRIP, %ymm15, %ymm15;
vpxor %ymm0, %ymm15, %ymm0;
vpxor %ymm1, %ymm15, %ymm1;
vpxor %ymm2, %ymm15, %ymm2;
@@ -1116,7 +1113,9 @@ _gcry_camellia_aesni_avx2_ctr_enc:
vzeroall;
leave;
+ CFI_LEAVE();
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_camellia_aesni_avx2_ctr_enc,.-_gcry_camellia_aesni_avx2_ctr_enc;)
.align 8
@@ -1130,9 +1129,12 @@ _gcry_camellia_aesni_avx2_cbc_dec:
* %rdx: src (32 blocks)
* %rcx: iv
*/
+ CFI_STARTPROC();
pushq %rbp;
+ CFI_PUSH(%rbp);
movq %rsp, %rbp;
+ CFI_DEF_CFA_REGISTER(%rbp);
vzeroupper;
@@ -1188,7 +1190,9 @@ _gcry_camellia_aesni_avx2_cbc_dec:
vzeroall;
leave;
+ CFI_LEAVE();
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_camellia_aesni_avx2_cbc_dec,.-_gcry_camellia_aesni_avx2_cbc_dec;)
.align 8
@@ -1202,9 +1206,12 @@ _gcry_camellia_aesni_avx2_cfb_dec:
* %rdx: src (32 blocks)
* %rcx: iv
*/
+ CFI_STARTPROC();
pushq %rbp;
+ CFI_PUSH(%rbp);
movq %rsp, %rbp;
+ CFI_DEF_CFA_REGISTER(%rbp);
vzeroupper;
@@ -1214,7 +1221,7 @@ _gcry_camellia_aesni_avx2_cfb_dec:
/* inpack16_pre: */
vpbroadcastq (key_table)(CTX), %ymm0;
- vpshufb .Lpack_bswap RIP, %ymm0, %ymm0;
+ vpshufb .Lpack_bswap rRIP, %ymm0, %ymm0;
vmovdqu (%rcx), %xmm15;
vinserti128 $1, (%rdx), %ymm15, %ymm15;
vpxor %ymm15, %ymm0, %ymm15;
@@ -1262,7 +1269,9 @@ _gcry_camellia_aesni_avx2_cfb_dec:
vzeroall;
leave;
+ CFI_LEAVE();
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_camellia_aesni_avx2_cfb_dec,.-_gcry_camellia_aesni_avx2_cfb_dec;)
.align 8
@@ -1278,9 +1287,12 @@ _gcry_camellia_aesni_avx2_ocb_enc:
* %r8 : checksum
* %r9 : L pointers (void *L[32])
*/
+ CFI_STARTPROC();
pushq %rbp;
+ CFI_PUSH(%rbp);
movq %rsp, %rbp;
+ CFI_DEF_CFA_REGISTER(%rbp);
vzeroupper;
@@ -1288,10 +1300,14 @@ _gcry_camellia_aesni_avx2_ocb_enc:
andq $~63, %rsp;
movq %rsp, %rax;
- movq %r10, (16 * 32 + 0 * 8)(%rax);
- movq %r11, (16 * 32 + 1 * 8)(%rax);
- movq %r12, (16 * 32 + 2 * 8)(%rax);
- movq %r13, (16 * 32 + 3 * 8)(%rax);
+ movq %r10, (16 * 32 + 0 * 8)(%rsp);
+ movq %r11, (16 * 32 + 1 * 8)(%rsp);
+ movq %r12, (16 * 32 + 2 * 8)(%rsp);
+ movq %r13, (16 * 32 + 3 * 8)(%rsp);
+ CFI_REG_ON_STACK(r10, 16 * 32 + 0 * 8);
+ CFI_REG_ON_STACK(r11, 16 * 32 + 1 * 8);
+ CFI_REG_ON_STACK(r12, 16 * 32 + 2 * 8);
+ CFI_REG_ON_STACK(r13, 16 * 32 + 3 * 8);
vmovdqu (%rcx), %xmm14;
vmovdqu (%r8), %xmm13;
@@ -1369,7 +1385,7 @@ _gcry_camellia_aesni_avx2_ocb_enc:
/* inpack16_pre: */
vpbroadcastq (key_table)(CTX), %ymm15;
- vpshufb .Lpack_bswap RIP, %ymm15, %ymm15;
+ vpshufb .Lpack_bswap rRIP, %ymm15, %ymm15;
vpxor %ymm0, %ymm15, %ymm0;
vpxor %ymm1, %ymm15, %ymm1;
vpxor %ymm2, %ymm15, %ymm2;
@@ -1412,13 +1428,19 @@ _gcry_camellia_aesni_avx2_ocb_enc:
vzeroall;
- movq (16 * 32 + 0 * 8)(%rax), %r10;
- movq (16 * 32 + 1 * 8)(%rax), %r11;
- movq (16 * 32 + 2 * 8)(%rax), %r12;
- movq (16 * 32 + 3 * 8)(%rax), %r13;
+ movq (16 * 32 + 0 * 8)(%rsp), %r10;
+ movq (16 * 32 + 1 * 8)(%rsp), %r11;
+ movq (16 * 32 + 2 * 8)(%rsp), %r12;
+ movq (16 * 32 + 3 * 8)(%rsp), %r13;
+ CFI_RESTORE(%r10);
+ CFI_RESTORE(%r11);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
leave;
+ CFI_LEAVE();
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_camellia_aesni_avx2_ocb_enc,.-_gcry_camellia_aesni_avx2_ocb_enc;)
.align 8
@@ -1434,9 +1456,12 @@ _gcry_camellia_aesni_avx2_ocb_dec:
* %r8 : checksum
* %r9 : L pointers (void *L[32])
*/
+ CFI_STARTPROC();
pushq %rbp;
+ CFI_PUSH(%rbp);
movq %rsp, %rbp;
+ CFI_DEF_CFA_REGISTER(%rbp);
vzeroupper;
@@ -1444,10 +1469,14 @@ _gcry_camellia_aesni_avx2_ocb_dec:
andq $~63, %rsp;
movq %rsp, %rax;
- movq %r10, (16 * 32 + 0 * 8)(%rax);
- movq %r11, (16 * 32 + 1 * 8)(%rax);
- movq %r12, (16 * 32 + 2 * 8)(%rax);
- movq %r13, (16 * 32 + 3 * 8)(%rax);
+ movq %r10, (16 * 32 + 0 * 8)(%rsp);
+ movq %r11, (16 * 32 + 1 * 8)(%rsp);
+ movq %r12, (16 * 32 + 2 * 8)(%rsp);
+ movq %r13, (16 * 32 + 3 * 8)(%rsp);
+ CFI_REG_ON_STACK(r10, 16 * 32 + 0 * 8);
+ CFI_REG_ON_STACK(r11, 16 * 32 + 1 * 8);
+ CFI_REG_ON_STACK(r12, 16 * 32 + 2 * 8);
+ CFI_REG_ON_STACK(r13, 16 * 32 + 3 * 8);
vmovdqu (%rcx), %xmm14;
@@ -1525,7 +1554,7 @@ _gcry_camellia_aesni_avx2_ocb_dec:
/* inpack16_pre: */
vpbroadcastq (key_table)(CTX, %r8, 8), %ymm15;
- vpshufb .Lpack_bswap RIP, %ymm15, %ymm15;
+ vpshufb .Lpack_bswap rRIP, %ymm15, %ymm15;
vpxor %ymm0, %ymm15, %ymm0;
vpxor %ymm1, %ymm15, %ymm1;
vpxor %ymm2, %ymm15, %ymm2;
@@ -1596,13 +1625,19 @@ _gcry_camellia_aesni_avx2_ocb_dec:
vzeroall;
- movq (16 * 32 + 0 * 8)(%rax), %r10;
- movq (16 * 32 + 1 * 8)(%rax), %r11;
- movq (16 * 32 + 2 * 8)(%rax), %r12;
- movq (16 * 32 + 3 * 8)(%rax), %r13;
+ movq (16 * 32 + 0 * 8)(%rsp), %r10;
+ movq (16 * 32 + 1 * 8)(%rsp), %r11;
+ movq (16 * 32 + 2 * 8)(%rsp), %r12;
+ movq (16 * 32 + 3 * 8)(%rsp), %r13;
+ CFI_RESTORE(%r10);
+ CFI_RESTORE(%r11);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
leave;
+ CFI_LEAVE();
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_camellia_aesni_avx2_ocb_dec,.-_gcry_camellia_aesni_avx2_ocb_dec;)
.align 8
@@ -1617,9 +1652,12 @@ _gcry_camellia_aesni_avx2_ocb_auth:
* %rcx: checksum
* %r8 : L pointers (void *L[16])
*/
+ CFI_STARTPROC();
pushq %rbp;
+ CFI_PUSH(%rbp);
movq %rsp, %rbp;
+ CFI_DEF_CFA_REGISTER(%rbp);
vzeroupper;
@@ -1627,10 +1665,14 @@ _gcry_camellia_aesni_avx2_ocb_auth:
andq $~63, %rsp;
movq %rsp, %rax;
- movq %r10, (16 * 32 + 0 * 8)(%rax);
- movq %r11, (16 * 32 + 1 * 8)(%rax);
- movq %r12, (16 * 32 + 2 * 8)(%rax);
- movq %r13, (16 * 32 + 3 * 8)(%rax);
+ movq %r10, (16 * 32 + 0 * 8)(%rsp);
+ movq %r11, (16 * 32 + 1 * 8)(%rsp);
+ movq %r12, (16 * 32 + 2 * 8)(%rsp);
+ movq %r13, (16 * 32 + 3 * 8)(%rsp);
+ CFI_REG_ON_STACK(r10, 16 * 32 + 0 * 8);
+ CFI_REG_ON_STACK(r11, 16 * 32 + 1 * 8);
+ CFI_REG_ON_STACK(r12, 16 * 32 + 2 * 8);
+ CFI_REG_ON_STACK(r13, 16 * 32 + 3 * 8);
vmovdqu (%rdx), %xmm14;
@@ -1703,7 +1745,7 @@ _gcry_camellia_aesni_avx2_ocb_auth:
/* inpack16_pre: */
vpbroadcastq (key_table)(CTX), %ymm15;
- vpshufb .Lpack_bswap RIP, %ymm15, %ymm15;
+ vpshufb .Lpack_bswap rRIP, %ymm15, %ymm15;
vpxor %ymm0, %ymm15, %ymm0;
vpxor %ymm1, %ymm15, %ymm1;
vpxor %ymm2, %ymm15, %ymm2;
@@ -1749,13 +1791,19 @@ _gcry_camellia_aesni_avx2_ocb_auth:
vzeroall;
- movq (16 * 32 + 0 * 8)(%rax), %r10;
- movq (16 * 32 + 1 * 8)(%rax), %r11;
- movq (16 * 32 + 2 * 8)(%rax), %r12;
- movq (16 * 32 + 3 * 8)(%rax), %r13;
+ movq (16 * 32 + 0 * 8)(%rsp), %r10;
+ movq (16 * 32 + 1 * 8)(%rsp), %r11;
+ movq (16 * 32 + 2 * 8)(%rsp), %r12;
+ movq (16 * 32 + 3 * 8)(%rsp), %r13;
+ CFI_RESTORE(%r10);
+ CFI_RESTORE(%r11);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
leave;
+ CFI_LEAVE();
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_camellia_aesni_avx2_ocb_auth,.-_gcry_camellia_aesni_avx2_ocb_auth;)
#endif /*defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT)*/
diff --git a/cipher/cast5-amd64.S b/cipher/cast5-amd64.S
index 1a1d43fd..82f67890 100644
--- a/cipher/cast5-amd64.S
+++ b/cipher/cast5-amd64.S
@@ -183,10 +183,13 @@ _gcry_cast5_amd64_encrypt_block:
* %rsi: dst
* %rdx: src
*/
+ CFI_STARTPROC();
ENTER_SYSV_FUNC_PARAMS_0_4
pushq %rbp;
+ CFI_PUSH(%rbp);
pushq %rbx;
+ CFI_PUSH(%rbx);
movq %rsi, %r10;
@@ -211,10 +214,13 @@ _gcry_cast5_amd64_encrypt_block:
write_block();
popq %rbx;
+ CFI_POP(%rbx);
popq %rbp;
+ CFI_POP(%rbp);
EXIT_SYSV_FUNC
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_cast5_amd64_encrypt_block,.-_gcry_cast5_amd64_encrypt_block;)
.align 8
@@ -227,10 +233,13 @@ _gcry_cast5_amd64_decrypt_block:
* %rsi: dst
* %rdx: src
*/
+ CFI_STARTPROC();
ENTER_SYSV_FUNC_PARAMS_0_4
pushq %rbp;
+ CFI_PUSH(%rbp);
pushq %rbx;
+ CFI_PUSH(%rbx);
movq %rsi, %r10;
@@ -255,10 +264,13 @@ _gcry_cast5_amd64_decrypt_block:
write_block();
popq %rbx;
+ CFI_POP(%rbx);
popq %rbp;
+ CFI_POP(%rbp);
EXIT_SYSV_FUNC
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_cast5_amd64_decrypt_block,.-_gcry_cast5_amd64_decrypt_block;)
/**********************************************************************
@@ -371,6 +383,7 @@ __cast5_enc_blk4:
* output:
* RLR0,RLR1,RLR2,RLR3: four output ciphertext blocks
*/
+ CFI_STARTPROC();
GET_EXTERN_POINTER(_gcry_cast5_s1to4, RTAB);
get_round_km(0, RKM0d);
@@ -387,6 +400,7 @@ __cast5_enc_blk4:
outbswap_block4(RLR0, RLR1, RLR2, RLR3);
ret;
+ CFI_ENDPROC();
ELF(.size __cast5_enc_blk4,.-__cast5_enc_blk4;)
.align 8
@@ -399,6 +413,7 @@ __cast5_dec_blk4:
* output:
* RLR0,RLR1,RLR2,RLR3: four output plaintext blocks
*/
+ CFI_STARTPROC();
GET_EXTERN_POINTER(_gcry_cast5_s1to4, RTAB);
inbswap_block4(RLR0, RLR1, RLR2, RLR3);
@@ -416,6 +431,7 @@ __cast5_dec_blk4:
round_dec_last4(1, F4_2, F4_1);
outbswap_block4(RLR0, RLR1, RLR2, RLR3);
+ CFI_ENDPROC();
ret;
ELF(.size __cast5_dec_blk4,.-__cast5_dec_blk4;)
@@ -425,20 +441,28 @@ ELF(.type _gcry_cast5_amd64_ctr_enc,@function;)
_gcry_cast5_amd64_ctr_enc:
/* input:
* %rdi: ctx, CTX
- * %rsi: dst (8 blocks)
- * %rdx: src (8 blocks)
+ * %rsi: dst (4 blocks)
+ * %rdx: src (4 blocks)
* %rcx: iv (big endian, 64bit)
*/
+ CFI_STARTPROC();
ENTER_SYSV_FUNC_PARAMS_0_4
pushq %rbp;
+ CFI_PUSH(%rbp);
pushq %rbx;
+ CFI_PUSH(%rbx);
pushq %r12;
+ CFI_PUSH(%r12);
pushq %r13;
+ CFI_PUSH(%r13);
pushq %r14;
+ CFI_PUSH(%r14);
pushq %rsi;
+ CFI_PUSH(%rsi);
pushq %rdx;
+ CFI_PUSH(%rdx);
/* load IV and byteswap */
movq (%rcx), RX0;
@@ -458,7 +482,9 @@ _gcry_cast5_amd64_ctr_enc:
call __cast5_enc_blk4;
popq %r14; /*src*/
+ CFI_POP_TMP_REG();
popq %r13; /*dst*/
+ CFI_POP_TMP_REG();
/* XOR key-stream with plaintext */
xorq 0 * 8(%r14), RLR0;
@@ -471,13 +497,19 @@ _gcry_cast5_amd64_ctr_enc:
movq RLR3, 3 * 8(%r13);
popq %r14;
+ CFI_POP(%r14);
popq %r13;
+ CFI_POP(%r13);
popq %r12;
+ CFI_POP(%r12);
popq %rbx;
+ CFI_POP(%rbx);
popq %rbp;
+ CFI_POP(%rbp);
EXIT_SYSV_FUNC
ret
+ CFI_ENDPROC();
ELF(.size _gcry_cast5_amd64_ctr_enc,.-_gcry_cast5_amd64_ctr_enc;)
.align 8
@@ -486,21 +518,30 @@ ELF(.type _gcry_cast5_amd64_cbc_dec,@function;)
_gcry_cast5_amd64_cbc_dec:
/* input:
* %rdi: ctx, CTX
- * %rsi: dst (8 blocks)
- * %rdx: src (8 blocks)
+ * %rsi: dst (4 blocks)
+ * %rdx: src (4 blocks)
* %rcx: iv (64bit)
*/
+ CFI_STARTPROC();
ENTER_SYSV_FUNC_PARAMS_0_4
pushq %rbp;
+ CFI_PUSH(%rbp);
pushq %rbx;
+ CFI_PUSH(%rbx);
pushq %r12;
+ CFI_PUSH(%r12);
pushq %r13;
+ CFI_PUSH(%r13);
pushq %r14;
+ CFI_PUSH(%r14);
pushq %rcx;
+ CFI_PUSH(%rcx);
pushq %rsi;
+ CFI_PUSH(%rsi);
pushq %rdx;
+ CFI_PUSH(%rdx);
/* load input */
movq 0 * 8(%rdx), RLR0;
@@ -511,8 +552,11 @@ _gcry_cast5_amd64_cbc_dec:
call __cast5_dec_blk4;
popq RX0; /*src*/
+ CFI_POP_TMP_REG();
popq RX1; /*dst*/
+ CFI_POP_TMP_REG();
popq RX2; /*iv*/
+ CFI_POP_TMP_REG();
movq 3 * 8(RX0), %r14;
xorq (RX2), RLR0;
@@ -527,14 +571,19 @@ _gcry_cast5_amd64_cbc_dec:
movq RLR3, 3 * 8(RX1);
popq %r14;
+ CFI_POP(%r14);
popq %r13;
+ CFI_POP(%r13);
popq %r12;
+ CFI_POP(%r12);
popq %rbx;
+ CFI_POP(%rbx);
popq %rbp;
+ CFI_POP(%rbp);
EXIT_SYSV_FUNC
ret;
-
+ CFI_ENDPROC();
ELF(.size _gcry_cast5_amd64_cbc_dec,.-_gcry_cast5_amd64_cbc_dec;)
.align 8
@@ -543,20 +592,28 @@ ELF(.type _gcry_cast5_amd64_cfb_dec,@function;)
_gcry_cast5_amd64_cfb_dec:
/* input:
* %rdi: ctx, CTX
- * %rsi: dst (8 blocks)
- * %rdx: src (8 blocks)
+ * %rsi: dst (4 blocks)
+ * %rdx: src (4 blocks)
* %rcx: iv (64bit)
*/
+ CFI_STARTPROC();
ENTER_SYSV_FUNC_PARAMS_0_4
pushq %rbp;
+ CFI_PUSH(%rbp);
pushq %rbx;
+ CFI_PUSH(%rbx);
pushq %r12;
+ CFI_PUSH(%r12);
pushq %r13;
+ CFI_PUSH(%r13);
pushq %r14;
+ CFI_PUSH(%r14);
pushq %rsi;
+ CFI_PUSH(%rsi);
pushq %rdx;
+ CFI_PUSH(%rdx);
/* Load input */
movq (%rcx), RLR0;
@@ -573,7 +630,9 @@ _gcry_cast5_amd64_cfb_dec:
call __cast5_enc_blk4;
popq %rdx; /*src*/
+ CFI_POP_TMP_REG();
popq %rcx; /*dst*/
+ CFI_POP_TMP_REG();
xorq 0 * 8(%rdx), RLR0;
xorq 1 * 8(%rdx), RLR1;
@@ -585,14 +644,19 @@ _gcry_cast5_amd64_cfb_dec:
movq RLR3, 3 * 8(%rcx);
popq %r14;
+ CFI_POP(%r14);
popq %r13;
+ CFI_POP(%r13);
popq %r12;
+ CFI_POP(%r12);
popq %rbx;
+ CFI_POP(%rbx);
popq %rbp;
+ CFI_POP(%rbp);
EXIT_SYSV_FUNC
ret;
-
+ CFI_ENDPROC();
ELF(.size _gcry_cast5_amd64_cfb_dec,.-_gcry_cast5_amd64_cfb_dec;)
#endif /*defined(USE_CAST5)*/
diff --git a/cipher/chacha20-amd64-avx2.S b/cipher/chacha20-amd64-avx2.S
index 94c8e8cf..de6263b6 100644
--- a/cipher/chacha20-amd64-avx2.S
+++ b/cipher/chacha20-amd64-avx2.S
@@ -179,11 +179,14 @@ _gcry_chacha20_amd64_avx2_blocks8:
* %rdx: src
* %rcx: nblks (multiple of 8)
*/
+ CFI_STARTPROC();
vzeroupper;
pushq %rbp;
+ CFI_PUSH(%rbp);
movq %rsp, %rbp;
+ CFI_DEF_CFA_REGISTER(%rbp);
subq $STACK_MAX, %rsp;
andq $~31, %rsp;
@@ -318,7 +321,9 @@ _gcry_chacha20_amd64_avx2_blocks8:
/* eax zeroed by round loop. */
leave;
+ CFI_LEAVE();
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_chacha20_amd64_avx2_blocks8,
.-_gcry_chacha20_amd64_avx2_blocks8;)
@@ -339,9 +344,12 @@ _gcry_chacha20_poly1305_amd64_avx2_blocks8:
* %r9: poly1305-state
* %r8: poly1305-src
*/
+ CFI_STARTPROC();
pushq %rbp;
+ CFI_PUSH(%rbp);
movq %rsp, %rbp;
+ CFI_DEF_CFA_REGISTER(%rbp);
vzeroupper;
@@ -353,6 +361,11 @@ _gcry_chacha20_poly1305_amd64_avx2_blocks8:
movq %r13, (STACK_MAX + 2 * 8)(%rsp);
movq %r14, (STACK_MAX + 3 * 8)(%rsp);
movq %r15, (STACK_MAX + 4 * 8)(%rsp);
+ CFI_REG_ON_STACK(rbx, STACK_MAX + 0 * 8);
+ CFI_REG_ON_STACK(r12, STACK_MAX + 1 * 8);
+ CFI_REG_ON_STACK(r13, STACK_MAX + 2 * 8);
+ CFI_REG_ON_STACK(r14, STACK_MAX + 3 * 8);
+ CFI_REG_ON_STACK(r15, STACK_MAX + 4 * 8);
movq %rdx, (STACK_MAX + 5 * 8)(%rsp); # SRC
movq %rsi, (STACK_MAX + 6 * 8)(%rsp); # DST
@@ -752,10 +765,17 @@ _gcry_chacha20_poly1305_amd64_avx2_blocks8:
movq (STACK_MAX + 2 * 8)(%rsp), %r13;
movq (STACK_MAX + 3 * 8)(%rsp), %r14;
movq (STACK_MAX + 4 * 8)(%rsp), %r15;
+ CFI_RESTORE(%rbx);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
+ CFI_RESTORE(%r14);
+ CFI_RESTORE(%r15);
xorl %eax, %eax;
leave;
+ CFI_LEAVE();
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_chacha20_poly1305_amd64_avx2_blocks8,
.-_gcry_chacha20_poly1305_amd64_avx2_blocks8;)
diff --git a/cipher/chacha20-amd64-ssse3.S b/cipher/chacha20-amd64-ssse3.S
index 1657f771..6bbf12fc 100644
--- a/cipher/chacha20-amd64-ssse3.S
+++ b/cipher/chacha20-amd64-ssse3.S
@@ -175,9 +175,12 @@ _gcry_chacha20_amd64_ssse3_blocks4:
* %rdx: src
* %rcx: nblks (multiple of 4)
*/
+ CFI_STARTPROC();
pushq %rbp;
+ CFI_PUSH(%rbp);
movq %rsp, %rbp;
+ CFI_DEF_CFA_REGISTER(%rbp);
subq $STACK_MAX, %rsp;
andq $~15, %rsp;
@@ -329,7 +332,9 @@ _gcry_chacha20_amd64_ssse3_blocks4:
/* eax zeroed by round loop. */
leave;
+ CFI_LEAVE();
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_chacha20_amd64_ssse3_blocks4,
.-_gcry_chacha20_amd64_ssse3_blocks4;)
@@ -372,6 +377,7 @@ _gcry_chacha20_amd64_ssse3_blocks1:
* %rdx: src
* %rcx: nblks
*/
+ CFI_STARTPROC();
/* Load constants */
movdqa .Lcounter1 rRIP, X4;
@@ -497,6 +503,7 @@ _gcry_chacha20_amd64_ssse3_blocks1:
/* eax zeroed by round loop. */
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_chacha20_amd64_ssse3_blocks1,
.-_gcry_chacha20_amd64_ssse3_blocks1;)
@@ -517,9 +524,12 @@ _gcry_chacha20_poly1305_amd64_ssse3_blocks4:
* %r9: poly1305-state
* %r8: poly1305-src
*/
+ CFI_STARTPROC();
pushq %rbp;
+ CFI_PUSH(%rbp);
movq %rsp, %rbp;
+ CFI_DEF_CFA_REGISTER(%rbp);
subq $(8 * 8) + STACK_MAX + 16, %rsp;
andq $~15, %rsp;
@@ -529,6 +539,11 @@ _gcry_chacha20_poly1305_amd64_ssse3_blocks4:
movq %r13, (STACK_MAX + 2 * 8)(%rsp);
movq %r14, (STACK_MAX + 3 * 8)(%rsp);
movq %r15, (STACK_MAX + 4 * 8)(%rsp);
+ CFI_REG_ON_STACK(rbx, STACK_MAX + 0 * 8);
+ CFI_REG_ON_STACK(r12, STACK_MAX + 1 * 8);
+ CFI_REG_ON_STACK(r13, STACK_MAX + 2 * 8);
+ CFI_REG_ON_STACK(r14, STACK_MAX + 3 * 8);
+ CFI_REG_ON_STACK(r15, STACK_MAX + 4 * 8);
movq %rdx, (STACK_MAX + 5 * 8)(%rsp); # SRC
movq %rsi, (STACK_MAX + 6 * 8)(%rsp); # DST
@@ -901,10 +916,17 @@ _gcry_chacha20_poly1305_amd64_ssse3_blocks4:
movq (STACK_MAX + 2 * 8)(%rsp), %r13;
movq (STACK_MAX + 3 * 8)(%rsp), %r14;
movq (STACK_MAX + 4 * 8)(%rsp), %r15;
+ CFI_RESTORE(%rbx);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
+ CFI_RESTORE(%r14);
+ CFI_RESTORE(%r15);
xorl %eax, %eax;
leave;
+ CFI_LEAVE();
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_chacha20_poly1305_amd64_ssse3_blocks4,
.-_gcry_chacha20_poly1305_amd64_ssse3_blocks4;)
@@ -925,8 +947,12 @@ _gcry_chacha20_poly1305_amd64_ssse3_blocks1:
* %r9: poly1305-state
* %r8: poly1305-src
*/
+ CFI_STARTPROC();
+
pushq %rbp;
+ CFI_PUSH(%rbp);
movq %rsp, %rbp;
+ CFI_DEF_CFA_REGISTER(%rbp);
subq $(8 * 8), %rsp;
movq %rbx, (0 * 8)(%rsp);
@@ -934,6 +960,11 @@ _gcry_chacha20_poly1305_amd64_ssse3_blocks1:
movq %r13, (2 * 8)(%rsp);
movq %r14, (3 * 8)(%rsp);
movq %r15, (4 * 8)(%rsp);
+ CFI_REG_ON_STACK(rbx, 0 * 8);
+ CFI_REG_ON_STACK(r12, 1 * 8);
+ CFI_REG_ON_STACK(r13, 2 * 8);
+ CFI_REG_ON_STACK(r14, 3 * 8);
+ CFI_REG_ON_STACK(r15, 4 * 8);
movq %rdx, (5 * 8)(%rsp); # SRC
movq %rsi, (6 * 8)(%rsp); # DST
@@ -1206,10 +1237,17 @@ _gcry_chacha20_poly1305_amd64_ssse3_blocks1:
movq (2 * 8)(%rsp), %r13;
movq (3 * 8)(%rsp), %r14;
movq (4 * 8)(%rsp), %r15;
+ CFI_RESTORE(%rbx);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
+ CFI_RESTORE(%r14);
+ CFI_RESTORE(%r15);
xorl %eax, %eax;
leave;
+ CFI_LEAVE();
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_chacha20_poly1305_amd64_ssse3_blocks1,
.-_gcry_chacha20_poly1305_amd64_ssse3_blocks1;)
diff --git a/cipher/des-amd64.S b/cipher/des-amd64.S
index f25573d9..a211dac3 100644
--- a/cipher/des-amd64.S
+++ b/cipher/des-amd64.S
@@ -190,15 +190,23 @@ _gcry_3des_amd64_crypt_block:
* %rsi: dst
* %rdx: src
*/
+ CFI_STARTPROC();
ENTER_SYSV_FUNC_PARAMS_0_4
pushq %rbp;
+ CFI_PUSH(%rbp);
pushq %rbx;
+ CFI_PUSH(%rbx);
pushq %r12;
+ CFI_PUSH(%r12);
pushq %r13;
+ CFI_PUSH(%r13);
pushq %r14;
+ CFI_PUSH(%r14);
pushq %r15;
+ CFI_PUSH(%r15);
pushq %rsi; /*dst*/
+ CFI_PUSH(%rsi);
leaq .L_s1 rRIP, SBOXES;
@@ -259,18 +267,26 @@ _gcry_3des_amd64_crypt_block:
round1(32+15, RL0, RR0, dummy2);
popq RW2; /*dst*/
+ CFI_POP_TMP_REG();
final_permutation(RR0, RL0);
write_block(RW2, RR0, RL0);
popq %r15;
+ CFI_POP(%r15);
popq %r14;
+ CFI_POP(%r14);
popq %r13;
+ CFI_POP(%r13);
popq %r12;
+ CFI_POP(%r12);
popq %rbx;
+ CFI_POP(%rbx);
popq %rbp;
+ CFI_POP(%rbp);
EXIT_SYSV_FUNC
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_3des_amd64_crypt_block,.-_gcry_3des_amd64_crypt_block;)
/***********************************************************************
@@ -465,6 +481,7 @@ _gcry_3des_amd64_crypt_blk3:
* RL0d, RR0d, RL1d, RR1d, RL2d, RR2d: 3 input blocks
* RR0d, RL0d, RR1d, RL1d, RR2d, RL2d: 3 output blocks
*/
+ CFI_STARTPROC();
leaq .L_s1 rRIP, SBOXES;
@@ -528,6 +545,7 @@ _gcry_3des_amd64_crypt_blk3:
final_permutation3(RR, RL);
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_3des_amd64_crypt_blk3,.-_gcry_3des_amd64_crypt_blk3;)
.align 8
@@ -540,18 +558,28 @@ _gcry_3des_amd64_cbc_dec:
* %rdx: src (3 blocks)
* %rcx: iv (64bit)
*/
+ CFI_STARTPROC();
ENTER_SYSV_FUNC_PARAMS_0_4
pushq %rbp;
+ CFI_PUSH(%rbp);
pushq %rbx;
+ CFI_PUSH(%rbx);
pushq %r12;
+ CFI_PUSH(%r12);
pushq %r13;
+ CFI_PUSH(%r13);
pushq %r14;
+ CFI_PUSH(%r14);
pushq %r15;
+ CFI_PUSH(%r15);
pushq %rsi; /*dst*/
+ CFI_PUSH(%rsi);
pushq %rdx; /*src*/
+ CFI_PUSH(%rdx);
pushq %rcx; /*iv*/
+ CFI_PUSH(%rcx);
/* load input */
movl 0 * 4(%rdx), RL0d;
@@ -571,8 +599,11 @@ _gcry_3des_amd64_cbc_dec:
call _gcry_3des_amd64_crypt_blk3;
popq %rcx; /*iv*/
+ CFI_POP_TMP_REG();
popq %rdx; /*src*/
+ CFI_POP_TMP_REG();
popq %rsi; /*dst*/
+ CFI_POP_TMP_REG();
bswapl RR0d;
bswapl RL0d;
@@ -598,14 +629,21 @@ _gcry_3des_amd64_cbc_dec:
movl RL2d, 5 * 4(%rsi);
popq %r15;
+ CFI_POP(%r15);
popq %r14;
+ CFI_POP(%r14);
popq %r13;
+ CFI_POP(%r13);
popq %r12;
+ CFI_POP(%r12);
popq %rbx;
+ CFI_POP(%rbx);
popq %rbp;
+ CFI_POP(%rbp);
EXIT_SYSV_FUNC
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_3des_amd64_cbc_dec,.-_gcry_3des_amd64_cbc_dec;)
.align 8
@@ -618,17 +656,26 @@ _gcry_3des_amd64_ctr_enc:
* %rdx: src (3 blocks)
* %rcx: iv (64bit)
*/
+ CFI_STARTPROC();
ENTER_SYSV_FUNC_PARAMS_0_4
pushq %rbp;
+ CFI_PUSH(%rbp);
pushq %rbx;
+ CFI_PUSH(%rbx);
pushq %r12;
+ CFI_PUSH(%r12);
pushq %r13;
+ CFI_PUSH(%r13);
pushq %r14;
+ CFI_PUSH(%r14);
pushq %r15;
+ CFI_PUSH(%r15);
pushq %rsi; /*dst*/
+ CFI_PUSH(%rsi);
pushq %rdx; /*src*/
+ CFI_PUSH(%rdx);
movq %rcx, RW2;
/* load IV and byteswap */
@@ -654,7 +701,9 @@ _gcry_3des_amd64_ctr_enc:
call _gcry_3des_amd64_crypt_blk3;
popq %rdx; /*src*/
+ CFI_POP_TMP_REG();
popq %rsi; /*dst*/
+ CFI_POP_TMP_REG();
bswapl RR0d;
bswapl RL0d;
@@ -678,14 +727,21 @@ _gcry_3des_amd64_ctr_enc:
movl RL2d, 5 * 4(%rsi);
popq %r15;
+ CFI_POP(%r15);
popq %r14;
+ CFI_POP(%r14);
popq %r13;
+ CFI_POP(%r13);
popq %r12;
+ CFI_POP(%r12);
popq %rbx;
+ CFI_POP(%rbx);
popq %rbp;
+ CFI_POP(%rbp);
EXIT_SYSV_FUNC
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_3des_amd64_cbc_dec,.-_gcry_3des_amd64_cbc_dec;)
.align 8
@@ -698,17 +754,26 @@ _gcry_3des_amd64_cfb_dec:
* %rdx: src (3 blocks)
* %rcx: iv (64bit)
*/
+ CFI_STARTPROC();
ENTER_SYSV_FUNC_PARAMS_0_4
pushq %rbp;
+ CFI_PUSH(%rbp);
pushq %rbx;
+ CFI_PUSH(%rbx);
pushq %r12;
+ CFI_PUSH(%r12);
pushq %r13;
+ CFI_PUSH(%r13);
pushq %r14;
+ CFI_PUSH(%r14);
pushq %r15;
+ CFI_PUSH(%r15);
pushq %rsi; /*dst*/
+ CFI_PUSH(%rsi);
pushq %rdx; /*src*/
+ CFI_PUSH(%rdx);
movq %rcx, RW2;
/* Load input */
@@ -733,7 +798,9 @@ _gcry_3des_amd64_cfb_dec:
call _gcry_3des_amd64_crypt_blk3;
popq %rdx; /*src*/
+ CFI_POP_TMP_REG();
popq %rsi; /*dst*/
+ CFI_POP_TMP_REG();
bswapl RR0d;
bswapl RL0d;
@@ -757,14 +824,21 @@ _gcry_3des_amd64_cfb_dec:
movl RL2d, 5 * 4(%rsi);
popq %r15;
+ CFI_POP(%r15);
popq %r14;
+ CFI_POP(%r14);
popq %r13;
+ CFI_POP(%r13);
popq %r12;
+ CFI_POP(%r12);
popq %rbx;
+ CFI_POP(%rbx);
popq %rbp;
+ CFI_POP(%rbp);
EXIT_SYSV_FUNC
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_3des_amd64_cfb_dec,.-_gcry_3des_amd64_cfb_dec;)
.align 16
diff --git a/cipher/rijndael-amd64.S b/cipher/rijndael-amd64.S
index 798ff51a..3dcaa856 100644
--- a/cipher/rijndael-amd64.S
+++ b/cipher/rijndael-amd64.S
@@ -212,14 +212,19 @@ _gcry_aes_amd64_encrypt_block:
* %ecx: number of rounds.. 10, 12 or 14
* %r8: encryption tables
*/
+ CFI_STARTPROC();
ENTER_SYSV_FUNC_PARAMS_5
subq $(5 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(5 * 8);
movq %rsi, (0 * 8)(%rsp);
movl %ecx, (1 * 8)(%rsp);
movq %rbp, (2 * 8)(%rsp);
movq %rbx, (3 * 8)(%rsp);
movq %r12, (4 * 8)(%rsp);
+ CFI_REL_OFFSET(%rbp, 2 * 8);
+ CFI_REL_OFFSET(%rbx, 3 * 8);
+ CFI_REL_OFFSET(%r12, 4 * 8);
leaq (%r8), RTAB;
@@ -251,16 +256,23 @@ _gcry_aes_amd64_encrypt_block:
movl RCd, 2 * 4(%rsi);
movl RDd, 3 * 4(%rsi);
+ CFI_REMEMBER_STATE();
+
movq (4 * 8)(%rsp), %r12;
movq (3 * 8)(%rsp), %rbx;
movq (2 * 8)(%rsp), %rbp;
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%rbx);
+ CFI_RESTORE(%rbp);
addq $(5 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(-5 * 8);
movl $(6 * 8), %eax;
EXIT_SYSV_FUNC
ret;
+ CFI_RESTORE_STATE();
.align 4
.Lenc_not_128:
je .Lenc_192
@@ -280,6 +292,7 @@ _gcry_aes_amd64_encrypt_block:
lastencround(11);
jmp .Lenc_done;
+ CFI_ENDPROC();
ELF(.size _gcry_aes_amd64_encrypt_block,.-_gcry_aes_amd64_encrypt_block;)
#define do_decround(next_r) \
@@ -376,14 +389,19 @@ _gcry_aes_amd64_decrypt_block:
* %ecx: number of rounds.. 10, 12 or 14
* %r8: decryption tables
*/
+ CFI_STARTPROC();
ENTER_SYSV_FUNC_PARAMS_5
subq $(5 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(5 * 8);
movq %rsi, (0 * 8)(%rsp);
movl %ecx, (1 * 8)(%rsp);
movq %rbp, (2 * 8)(%rsp);
movq %rbx, (3 * 8)(%rsp);
movq %r12, (4 * 8)(%rsp);
+ CFI_REL_OFFSET(%rbp, 2 * 8);
+ CFI_REL_OFFSET(%rbx, 3 * 8);
+ CFI_REL_OFFSET(%r12, 4 * 8);
leaq (%r8), RTAB;
@@ -416,16 +434,23 @@ _gcry_aes_amd64_decrypt_block:
movl RCd, 2 * 4(%rsi);
movl RDd, 3 * 4(%rsi);
+ CFI_REMEMBER_STATE();
+
movq (4 * 8)(%rsp), %r12;
movq (3 * 8)(%rsp), %rbx;
movq (2 * 8)(%rsp), %rbp;
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%rbx);
+ CFI_RESTORE(%rbp);
addq $(5 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(-5 * 8);
movl $(6 * 8), %eax;
EXIT_SYSV_FUNC
ret;
+ CFI_RESTORE_STATE();
.align 4
.Ldec_256:
je .Ldec_192;
@@ -445,6 +470,7 @@ _gcry_aes_amd64_decrypt_block:
decround(9);
jmp .Ldec_tail;
+ CFI_ENDPROC();
ELF(.size _gcry_aes_amd64_decrypt_block,.-_gcry_aes_amd64_decrypt_block;)
#endif /*USE_AES*/
diff --git a/cipher/rijndael-ssse3-amd64-asm.S b/cipher/rijndael-ssse3-amd64-asm.S
index ffce5df2..8124eb21 100644
--- a/cipher/rijndael-ssse3-amd64-asm.S
+++ b/cipher/rijndael-ssse3-amd64-asm.S
@@ -50,6 +50,7 @@
ELF(.type _gcry_aes_ssse3_enc_preload,@function)
.globl _gcry_aes_ssse3_enc_preload
_gcry_aes_ssse3_enc_preload:
+ CFI_STARTPROC();
ENTER_SYSV_FUNC_PARAMS_0_4
lea .Laes_consts(%rip), %rax
movdqa (%rax), %xmm9 # 0F
@@ -61,6 +62,7 @@ _gcry_aes_ssse3_enc_preload:
movdqa .Lk_sb2+16(%rax), %xmm14 # sb2t
EXIT_SYSV_FUNC
ret
+ CFI_ENDPROC();
ELF(.size _gcry_aes_ssse3_enc_preload,.-_gcry_aes_ssse3_enc_preload)
##
@@ -69,6 +71,7 @@ ELF(.size _gcry_aes_ssse3_enc_preload,.-_gcry_aes_ssse3_enc_preload)
ELF(.type _gcry_aes_ssse3_dec_preload,@function)
.globl _gcry_aes_ssse3_dec_preload
_gcry_aes_ssse3_dec_preload:
+ CFI_STARTPROC();
ENTER_SYSV_FUNC_PARAMS_0_4
lea .Laes_consts(%rip), %rax
movdqa (%rax), %xmm9 # 0F
@@ -81,6 +84,7 @@ _gcry_aes_ssse3_dec_preload:
movdqa .Lk_dsbe (%rax), %xmm8 # sbeu
EXIT_SYSV_FUNC
ret
+ CFI_ENDPROC();
ELF(.size _gcry_aes_ssse3_dec_preload,.-_gcry_aes_ssse3_dec_preload)
##
@@ -111,6 +115,7 @@ ELF(.type _gcry_aes_ssse3_encrypt_core,@function)
.globl _gcry_aes_ssse3_encrypt_core
_gcry_aes_ssse3_encrypt_core:
_aes_encrypt_core:
+ CFI_STARTPROC();
ENTER_SYSV_FUNC_PARAMS_0_4
mov %rdi, %rdx
leaq -1(%rsi), %rax
@@ -190,6 +195,7 @@ _aes_encrypt_core:
pshufb .Lk_sr(%rsi,%rcx), %xmm0
EXIT_SYSV_FUNC
ret
+ CFI_ENDPROC();
ELF(.size _aes_encrypt_core,.-_aes_encrypt_core)
##
@@ -202,6 +208,7 @@ ELF(.size _aes_encrypt_core,.-_aes_encrypt_core)
ELF(.type _gcry_aes_ssse3_decrypt_core,@function)
_gcry_aes_ssse3_decrypt_core:
_aes_decrypt_core:
+ CFI_STARTPROC();
ENTER_SYSV_FUNC_PARAMS_0_4
mov %rdi, %rdx
lea .Laes_consts(%rip), %rcx
@@ -297,6 +304,7 @@ _aes_decrypt_core:
pshufb .Lk_sr(%rsi,%rcx), %xmm0
EXIT_SYSV_FUNC
ret
+ CFI_ENDPROC();
ELF(.size _aes_decrypt_core,.-_aes_decrypt_core)
########################################################
@@ -315,6 +323,7 @@ _aes_schedule_core:
# rdx = buffer
# rcx = direction. 0=encrypt, 1=decrypt
# r8 = rotoffs
+ CFI_STARTPROC();
ENTER_SYSV_FUNC_PARAMS_5
# load the tables
@@ -671,6 +680,7 @@ _aes_schedule_core:
pxor %xmm8, %xmm8
EXIT_SYSV_FUNC
ret
+ CFI_ENDPROC();
ELF(.size _gcry_aes_ssse3_schedule_core,.-_gcry_aes_ssse3_schedule_core)
########################################################
diff --git a/cipher/salsa20-amd64.S b/cipher/salsa20-amd64.S
index 470c32aa..ae8f2715 100644
--- a/cipher/salsa20-amd64.S
+++ b/cipher/salsa20-amd64.S
@@ -28,11 +28,7 @@
#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_SALSA20)
-#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
-# define ELF(...) __VA_ARGS__
-#else
-# define ELF(...) /*_*/
-#endif
+#include "asm-common-amd64.h"
.text
@@ -40,6 +36,7 @@
.globl _gcry_salsa20_amd64_keysetup
ELF(.type _gcry_salsa20_amd64_keysetup,@function;)
_gcry_salsa20_amd64_keysetup:
+ CFI_STARTPROC();
movl 0(%rsi),%r8d
movl 4(%rsi),%r9d
movl 8(%rsi),%eax
@@ -87,11 +84,13 @@ _gcry_salsa20_amd64_keysetup:
movl %r8d,12(%rdi)
.L_keysetupdone:
ret
+ CFI_ENDPROC();
.align 8
.globl _gcry_salsa20_amd64_ivsetup
ELF(.type _gcry_salsa20_amd64_ivsetup,@function;)
_gcry_salsa20_amd64_ivsetup:
+ CFI_STARTPROC();
movl 0(%rsi),%r8d
movl 4(%rsi),%esi
mov $0,%r9
@@ -101,6 +100,7 @@ _gcry_salsa20_amd64_ivsetup:
movl %r9d,32(%rdi)
movl %eax,52(%rdi)
ret
+ CFI_ENDPROC();
.align 8
.globl _gcry_salsa20_amd64_encrypt_blocks
@@ -112,13 +112,15 @@ _gcry_salsa20_amd64_encrypt_blocks:
* - Length is input as number of blocks, so don't handle tail bytes
* (this is done in salsa20.c).
*/
+ CFI_STARTPROC();
push %rbx
+ CFI_PUSH(%rbx);
shlq $6, %rcx /* blocks to bytes */
mov %r8, %rbx
mov %rsp,%r11
- and $31,%r11
- add $384,%r11
- sub %r11,%rsp
+ CFI_DEF_CFA_REGISTER(%r11);
+ sub $384,%rsp
+ and $~31,%rsp
mov %rdi,%r8
mov %rsi,%rsi
mov %rdx,%rdi
@@ -916,15 +918,22 @@ _gcry_salsa20_amd64_encrypt_blocks:
cmp $64,%rdx
ja .L_bytes_are_128_or_192
.L_done:
- add %r11,%rsp
+ CFI_REMEMBER_STATE();
mov %r11,%rax
+ sub %rsp,%rax
+ mov %r11,%rsp
+ CFI_REGISTER(%r11, %rsp)
+ CFI_DEF_CFA_REGISTER(%rsp)
pop %rbx
+ CFI_POP(%rbx)
ret
+ CFI_RESTORE_STATE();
.L_bytes_are_128_or_192:
sub $64,%rdx
add $64,%rdi
add $64,%rsi
jmp .L_bytes_are_64_128_or_192
+ CFI_ENDPROC();
ELF(.size _gcry_salsa20_amd64_encrypt_blocks,.-_gcry_salsa20_amd64_encrypt_blocks;)
#endif /*defined(USE_SALSA20)*/
diff --git a/cipher/serpent-avx2-amd64.S b/cipher/serpent-avx2-amd64.S
index 8d60a159..9b17c2bd 100644
--- a/cipher/serpent-avx2-amd64.S
+++ b/cipher/serpent-avx2-amd64.S
@@ -24,17 +24,7 @@
defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_SERPENT) && \
defined(ENABLE_AVX2_SUPPORT)
-#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
-# define ELF(...) __VA_ARGS__
-#else
-# define ELF(...) /*_*/
-#endif
-
-#ifdef __PIC__
-# define RIP (%rip)
-#else
-# define RIP
-#endif
+#include "asm-common-amd64.h"
/* struct serpent_context: */
#define ctx_keys 0
@@ -421,6 +411,7 @@ __serpent_enc_blk16:
* RA4, RA1, RA2, RA0, RB4, RB1, RB2, RB0: sixteen parallel
* ciphertext blocks
*/
+ CFI_STARTPROC();
vpcmpeqd RNOT, RNOT, RNOT;
@@ -496,6 +487,7 @@ __serpent_enc_blk16:
transpose_4x4(RB4, RB1, RB2, RB0, RB3, RTMP0, RTMP1);
ret;
+ CFI_ENDPROC();
ELF(.size __serpent_enc_blk16,.-__serpent_enc_blk16;)
.align 8
@@ -509,6 +501,7 @@ __serpent_dec_blk16:
* RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: sixteen parallel
* plaintext blocks
*/
+ CFI_STARTPROC();
vpcmpeqd RNOT, RNOT, RNOT;
@@ -586,6 +579,7 @@ __serpent_dec_blk16:
transpose_4x4(RB0, RB1, RB2, RB3, RB4, RTMP0, RTMP1);
ret;
+ CFI_ENDPROC();
ELF(.size __serpent_dec_blk16,.-__serpent_dec_blk16;)
#define inc_le128(x, minus_one, tmp) \
@@ -604,13 +598,14 @@ _gcry_serpent_avx2_ctr_enc:
* %rdx: src (16 blocks)
* %rcx: iv (big endian, 128bit)
*/
+ CFI_STARTPROC();
movq 8(%rcx), %rax;
bswapq %rax;
vzeroupper;
- vbroadcasti128 .Lbswap128_mask RIP, RTMP3;
+ vbroadcasti128 .Lbswap128_mask rRIP, RTMP3;
vpcmpeqd RNOT, RNOT, RNOT;
vpsrldq $8, RNOT, RNOT; /* ab: -1:0 ; cd: -1:0 */
vpaddq RNOT, RNOT, RTMP2; /* ab: -2:0 ; cd: -2:0 */
@@ -701,7 +696,8 @@ _gcry_serpent_avx2_ctr_enc:
vzeroall;
- ret
+ ret;
+ CFI_ENDPROC();
ELF(.size _gcry_serpent_avx2_ctr_enc,.-_gcry_serpent_avx2_ctr_enc;)
.align 8
@@ -714,6 +710,7 @@ _gcry_serpent_avx2_cbc_dec:
* %rdx: src (16 blocks)
* %rcx: iv
*/
+ CFI_STARTPROC();
vzeroupper;
@@ -752,7 +749,8 @@ _gcry_serpent_avx2_cbc_dec:
vzeroall;
- ret
+ ret;
+ CFI_ENDPROC();
ELF(.size _gcry_serpent_avx2_cbc_dec,.-_gcry_serpent_avx2_cbc_dec;)
.align 8
@@ -765,6 +763,7 @@ _gcry_serpent_avx2_cfb_dec:
* %rdx: src (16 blocks)
* %rcx: iv
*/
+ CFI_STARTPROC();
vzeroupper;
@@ -805,7 +804,8 @@ _gcry_serpent_avx2_cfb_dec:
vzeroall;
- ret
+ ret;
+ CFI_ENDPROC();
ELF(.size _gcry_serpent_avx2_cfb_dec,.-_gcry_serpent_avx2_cfb_dec;)
.align 8
@@ -821,15 +821,21 @@ _gcry_serpent_avx2_ocb_enc:
* %r8 : checksum
* %r9 : L pointers (void *L[16])
*/
+ CFI_STARTPROC();
vzeroupper;
subq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(4 * 8);
movq %r10, (0 * 8)(%rsp);
movq %r11, (1 * 8)(%rsp);
movq %r12, (2 * 8)(%rsp);
movq %r13, (3 * 8)(%rsp);
+ CFI_REL_OFFSET(%r10, 0 * 8);
+ CFI_REL_OFFSET(%r11, 1 * 8);
+ CFI_REL_OFFSET(%r12, 2 * 8);
+ CFI_REL_OFFSET(%r13, 3 * 8);
vmovdqu (%rcx), RTMP0x;
vmovdqu (%r8), RTMP1x;
@@ -882,10 +888,15 @@ _gcry_serpent_avx2_ocb_enc:
movq (1 * 8)(%rsp), %r11;
movq (2 * 8)(%rsp), %r12;
movq (3 * 8)(%rsp), %r13;
+ CFI_RESTORE(%r10);
+ CFI_RESTORE(%r11);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
call __serpent_enc_blk16;
addq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(-4 * 8);
vpxor (0 * 32)(%rsi), RA4, RA4;
vpxor (1 * 32)(%rsi), RA1, RA1;
@@ -908,6 +919,7 @@ _gcry_serpent_avx2_ocb_enc:
vzeroall;
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_serpent_avx2_ocb_enc,.-_gcry_serpent_avx2_ocb_enc;)
.align 8
@@ -923,15 +935,21 @@ _gcry_serpent_avx2_ocb_dec:
* %r8 : checksum
* %r9 : L pointers (void *L[16])
*/
+ CFI_STARTPROC();
vzeroupper;
subq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(4 * 8);
movq %r10, (0 * 8)(%rsp);
movq %r11, (1 * 8)(%rsp);
movq %r12, (2 * 8)(%rsp);
movq %r13, (3 * 8)(%rsp);
+ CFI_REL_OFFSET(%r10, 0 * 8);
+ CFI_REL_OFFSET(%r11, 1 * 8);
+ CFI_REL_OFFSET(%r12, 2 * 8);
+ CFI_REL_OFFSET(%r13, 3 * 8);
vmovdqu (%rcx), RTMP0x;
@@ -978,10 +996,15 @@ _gcry_serpent_avx2_ocb_dec:
movq (1 * 8)(%rsp), %r11;
movq (2 * 8)(%rsp), %r12;
movq (3 * 8)(%rsp), %r13;
+ CFI_RESTORE(%r10);
+ CFI_RESTORE(%r11);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
call __serpent_dec_blk16;
addq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(-4 * 8);
vmovdqu (%r8), RTMP1x;
@@ -1020,6 +1043,7 @@ _gcry_serpent_avx2_ocb_dec:
vzeroall;
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_serpent_avx2_ocb_dec,.-_gcry_serpent_avx2_ocb_dec;)
.align 8
@@ -1034,15 +1058,21 @@ _gcry_serpent_avx2_ocb_auth:
* %rcx: checksum
* %r8 : L pointers (void *L[16])
*/
+ CFI_STARTPROC();
vzeroupper;
subq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(4 * 8);
movq %r10, (0 * 8)(%rsp);
movq %r11, (1 * 8)(%rsp);
movq %r12, (2 * 8)(%rsp);
movq %r13, (3 * 8)(%rsp);
+ CFI_REL_OFFSET(%r10, 0 * 8);
+ CFI_REL_OFFSET(%r11, 1 * 8);
+ CFI_REL_OFFSET(%r12, 2 * 8);
+ CFI_REL_OFFSET(%r13, 3 * 8);
vmovdqu (%rdx), RTMP0x;
@@ -1088,10 +1118,15 @@ _gcry_serpent_avx2_ocb_auth:
movq (1 * 8)(%rsp), %r11;
movq (2 * 8)(%rsp), %r12;
movq (3 * 8)(%rsp), %r13;
+ CFI_RESTORE(%r10);
+ CFI_RESTORE(%r11);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
call __serpent_enc_blk16;
addq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(-4 * 8);
vpxor RA4, RB4, RA4;
vpxor RA1, RB1, RA1;
@@ -1111,6 +1146,7 @@ _gcry_serpent_avx2_ocb_auth:
vzeroall;
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_serpent_avx2_ocb_auth,.-_gcry_serpent_avx2_ocb_auth;)
.align 16
diff --git a/cipher/serpent-sse2-amd64.S b/cipher/serpent-sse2-amd64.S
index b149af24..39cba002 100644
--- a/cipher/serpent-sse2-amd64.S
+++ b/cipher/serpent-sse2-amd64.S
@@ -23,17 +23,7 @@
#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_SERPENT)
-#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
-# define ELF(...) __VA_ARGS__
-#else
-# define ELF(...) /*_*/
-#endif
-
-#ifdef __PIC__
-# define RIP (%rip)
-#else
-# define RIP
-#endif
+#include "asm-common-amd64.h"
/* struct serpent_context: */
#define ctx_keys 0
@@ -444,6 +434,7 @@ __serpent_enc_blk8:
* RA4, RA1, RA2, RA0, RB4, RB1, RB2, RB0: eight parallel
* ciphertext blocks
*/
+ CFI_STARTPROC();
pcmpeqd RNOT, RNOT;
@@ -519,6 +510,7 @@ __serpent_enc_blk8:
transpose_4x4(RB4, RB1, RB2, RB0, RB3, RTMP0, RTMP1);
ret;
+ CFI_ENDPROC();
ELF(.size __serpent_enc_blk8,.-__serpent_enc_blk8;)
.align 8
@@ -532,6 +524,7 @@ __serpent_dec_blk8:
* RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel plaintext
* blocks
*/
+ CFI_STARTPROC();
pcmpeqd RNOT, RNOT;
@@ -609,6 +602,7 @@ __serpent_dec_blk8:
transpose_4x4(RB0, RB1, RB2, RB3, RB4, RTMP0, RTMP1);
ret;
+ CFI_ENDPROC();
ELF(.size __serpent_dec_blk8,.-__serpent_dec_blk8;)
.align 8
@@ -621,6 +615,7 @@ _gcry_serpent_sse2_ctr_enc:
* %rdx: src (8 blocks)
* %rcx: iv (big endian, 128bit)
*/
+ CFI_STARTPROC();
/* load IV and byteswap */
movdqu (%rcx), RA0;
@@ -738,7 +733,8 @@ _gcry_serpent_sse2_ctr_enc:
pxor RTMP2, RTMP2;
pxor RNOT, RNOT;
- ret
+ ret;
+ CFI_ENDPROC();
ELF(.size _gcry_serpent_sse2_ctr_enc,.-_gcry_serpent_sse2_ctr_enc;)
.align 8
@@ -751,6 +747,7 @@ _gcry_serpent_sse2_cbc_dec:
* %rdx: src (8 blocks)
* %rcx: iv
*/
+ CFI_STARTPROC();
movdqu (0 * 16)(%rdx), RA0;
movdqu (1 * 16)(%rdx), RA1;
@@ -799,7 +796,8 @@ _gcry_serpent_sse2_cbc_dec:
pxor RTMP2, RTMP2;
pxor RNOT, RNOT;
- ret
+ ret;
+ CFI_ENDPROC();
ELF(.size _gcry_serpent_sse2_cbc_dec,.-_gcry_serpent_sse2_cbc_dec;)
.align 8
@@ -812,6 +810,7 @@ _gcry_serpent_sse2_cfb_dec:
* %rdx: src (8 blocks)
* %rcx: iv
*/
+ CFI_STARTPROC();
/* Load input */
movdqu (%rcx), RA0;
@@ -863,7 +862,8 @@ _gcry_serpent_sse2_cfb_dec:
pxor RTMP2, RTMP2;
pxor RNOT, RNOT;
- ret
+ ret;
+ CFI_ENDPROC();
ELF(.size _gcry_serpent_sse2_cfb_dec,.-_gcry_serpent_sse2_cfb_dec;)
.align 8
@@ -879,13 +879,19 @@ _gcry_serpent_sse2_ocb_enc:
* %r8 : checksum
* %r9 : L pointers (void *L[8])
*/
+ CFI_STARTPROC();
subq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(4 * 8);
movq %r10, (0 * 8)(%rsp);
movq %r11, (1 * 8)(%rsp);
movq %r12, (2 * 8)(%rsp);
movq %r13, (3 * 8)(%rsp);
+ CFI_REL_OFFSET(%r10, 0 * 8);
+ CFI_REL_OFFSET(%r11, 1 * 8);
+ CFI_REL_OFFSET(%r12, 2 * 8);
+ CFI_REL_OFFSET(%r13, 3 * 8);
movdqu (%rcx), RTMP0;
movdqu (%r8), RTMP1;
@@ -926,10 +932,15 @@ _gcry_serpent_sse2_ocb_enc:
movq (1 * 8)(%rsp), %r11;
movq (2 * 8)(%rsp), %r12;
movq (3 * 8)(%rsp), %r13;
+ CFI_RESTORE(%r10);
+ CFI_RESTORE(%r11);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
call __serpent_enc_blk8;
addq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(-4 * 8);
pxor_u((0 * 16)(%rsi), RA4, RTMP0);
pxor_u((1 * 16)(%rsi), RA1, RTMP0);
@@ -966,6 +977,7 @@ _gcry_serpent_sse2_ocb_enc:
pxor RNOT, RNOT;
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_serpent_sse2_ocb_enc,.-_gcry_serpent_sse2_ocb_enc;)
.align 8
@@ -981,13 +993,19 @@ _gcry_serpent_sse2_ocb_dec:
* %r8 : checksum
* %r9 : L pointers (void *L[8])
*/
+ CFI_STARTPROC();
subq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(4 * 8);
movq %r10, (0 * 8)(%rsp);
movq %r11, (1 * 8)(%rsp);
movq %r12, (2 * 8)(%rsp);
movq %r13, (3 * 8)(%rsp);
+ CFI_REL_OFFSET(%r10, 0 * 8);
+ CFI_REL_OFFSET(%r11, 1 * 8);
+ CFI_REL_OFFSET(%r12, 2 * 8);
+ CFI_REL_OFFSET(%r13, 3 * 8);
movdqu (%rcx), RTMP0;
@@ -1024,10 +1042,15 @@ _gcry_serpent_sse2_ocb_dec:
movq (1 * 8)(%rsp), %r11;
movq (2 * 8)(%rsp), %r12;
movq (3 * 8)(%rsp), %r13;
+ CFI_RESTORE(%r10);
+ CFI_RESTORE(%r11);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
call __serpent_dec_blk8;
addq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(-4 * 8);
movdqu (%r8), RTMP0;
@@ -1078,6 +1101,7 @@ _gcry_serpent_sse2_ocb_dec:
pxor RNOT, RNOT;
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_serpent_sse2_ocb_dec,.-_gcry_serpent_sse2_ocb_dec;)
.align 8
@@ -1092,13 +1116,19 @@ _gcry_serpent_sse2_ocb_auth:
* %rcx: checksum
* %r8 : L pointers (void *L[8])
*/
+ CFI_STARTPROC();
subq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(4 * 8);
movq %r10, (0 * 8)(%rsp);
movq %r11, (1 * 8)(%rsp);
movq %r12, (2 * 8)(%rsp);
movq %r13, (3 * 8)(%rsp);
+ CFI_REL_OFFSET(%r10, 0 * 8);
+ CFI_REL_OFFSET(%r11, 1 * 8);
+ CFI_REL_OFFSET(%r12, 2 * 8);
+ CFI_REL_OFFSET(%r13, 3 * 8);
movdqu (%rdx), RTMP0;
@@ -1134,10 +1164,15 @@ _gcry_serpent_sse2_ocb_auth:
movq (1 * 8)(%rsp), %r11;
movq (2 * 8)(%rsp), %r12;
movq (3 * 8)(%rsp), %r13;
+ CFI_RESTORE(%r10);
+ CFI_RESTORE(%r11);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
call __serpent_enc_blk8;
addq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(-4 * 8);
movdqu (%rcx), RTMP0;
pxor RB4, RA4;
@@ -1169,6 +1204,7 @@ _gcry_serpent_sse2_ocb_auth:
pxor RNOT, RNOT;
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_serpent_sse2_ocb_auth,.-_gcry_serpent_sse2_ocb_auth;)
#endif /*defined(USE_SERPENT)*/
diff --git a/cipher/sha1-avx-amd64.S b/cipher/sha1-avx-amd64.S
index 5d674c15..85876ad4 100644
--- a/cipher/sha1-avx-amd64.S
+++ b/cipher/sha1-avx-amd64.S
@@ -33,18 +33,7 @@
defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
defined(HAVE_GCC_INLINE_ASM_AVX) && defined(USE_SHA1)
-#ifdef __PIC__
-# define RIP (%rip)
-#else
-# define RIP
-#endif
-
-
-#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
-# define ELF(...) __VA_ARGS__
-#else
-# define ELF(...) /*_*/
-#endif
+#include "asm-common-amd64.h"
/* Context structure */
@@ -161,7 +150,7 @@
vpshufb BSWAP_REG, tmp0, W;
#define W_PRECALC_00_15_2(i, W, tmp0) \
- vpaddd (.LK_XMM + ((i)/20)*16) RIP, W, tmp0;
+ vpaddd (.LK_XMM + ((i)/20)*16) rRIP, W, tmp0;
#define W_PRECALC_00_15_3(i, W, tmp0) \
vmovdqa tmp0, WK(i&~3);
@@ -186,7 +175,7 @@
#define W_PRECALC_16_31_3(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \
vpxor W, tmp0, tmp0; \
vpxor tmp1, tmp0, W; \
- vpaddd (.LK_XMM + ((i)/20)*16) RIP, W, tmp0; \
+ vpaddd (.LK_XMM + ((i)/20)*16) rRIP, W, tmp0; \
vmovdqa tmp0, WK((i)&~3);
#define W_PRECALC_32_79_0(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28, tmp0) \
@@ -203,7 +192,7 @@
#define W_PRECALC_32_79_3(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28, tmp0) \
vpor W, tmp0, W; \
- vpaddd (.LK_XMM + ((i)/20)*16) RIP, W, tmp0; \
+ vpaddd (.LK_XMM + ((i)/20)*16) rRIP, W, tmp0; \
vmovdqa tmp0, WK((i)&~3);
@@ -223,6 +212,7 @@ _gcry_sha1_transform_amd64_avx:
* %rsi: data (64*nblks bytes)
* %rdx: nblks
*/
+ CFI_STARTPROC();
xorl %eax, %eax;
cmpq $0, %rdx;
@@ -234,9 +224,12 @@ _gcry_sha1_transform_amd64_avx:
movq %rdi, RSTATE;
movq %rsi, RDATA;
pushq %rbx;
+ CFI_PUSH(%rbx);
pushq %rbp;
+ CFI_PUSH(%rbp);
movq %rsp, ROLDSTACK;
+ CFI_DEF_CFA_REGISTER(ROLDSTACK);
subq $(16*4), %rsp;
andq $(~31), %rsp;
@@ -248,7 +241,7 @@ _gcry_sha1_transform_amd64_avx:
movl state_h3(RSTATE), d;
movl state_h4(RSTATE), e;
- vmovdqa .Lbswap_shufb_ctl RIP, BSWAP_REG;
+ vmovdqa .Lbswap_shufb_ctl rRIP, BSWAP_REG;
/* Precalc 0-15. */
W_PRECALC_00_15_0(0, W0, Wtmp0);
@@ -415,15 +408,20 @@ _gcry_sha1_transform_amd64_avx:
movl e, state_h4(RSTATE);
movq ROLDSTACK, %rsp;
+ CFI_REGISTER(ROLDSTACK, %rsp);
+ CFI_DEF_CFA_REGISTER(%rsp);
popq %rbp;
+ CFI_POP(%rbp);
popq %rbx;
+ CFI_POP(%rbx);
/* stack already burned */
xorl %eax, %eax;
.Lret:
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_sha1_transform_amd64_avx,
.-_gcry_sha1_transform_amd64_avx;)
diff --git a/cipher/sha1-avx-bmi2-amd64.S b/cipher/sha1-avx-bmi2-amd64.S
index fe8901ef..5dfcdca9 100644
--- a/cipher/sha1-avx-bmi2-amd64.S
+++ b/cipher/sha1-avx-bmi2-amd64.S
@@ -34,18 +34,7 @@
defined(HAVE_GCC_INLINE_ASM_BMI2) && \
defined(HAVE_GCC_INLINE_ASM_AVX) && defined(USE_SHA1)
-#ifdef __PIC__
-# define RIP (%rip)
-#else
-# define RIP
-#endif
-
-
-#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
-# define ELF(...) __VA_ARGS__
-#else
-# define ELF(...) /*_*/
-#endif
+#include "asm-common-amd64.h"
/* Context structure */
@@ -222,6 +211,7 @@ _gcry_sha1_transform_amd64_avx_bmi2:
* %rsi: data (64*nblks bytes)
* %rdx: nblks
*/
+ CFI_STARTPROC();
xorl %eax, %eax;
cmpq $0, %rdx;
@@ -233,10 +223,14 @@ _gcry_sha1_transform_amd64_avx_bmi2:
movq %rdi, RSTATE;
movq %rsi, RDATA;
pushq %rbx;
+ CFI_PUSH(%rbx);
pushq %rbp;
+ CFI_PUSH(%rbp);
pushq %r12;
+ CFI_PUSH(%r12);
movq %rsp, ROLDSTACK;
+ CFI_DEF_CFA_REGISTER(ROLDSTACK);
subq $(16*4), %rsp;
andq $(~31), %rsp;
@@ -249,11 +243,11 @@ _gcry_sha1_transform_amd64_avx_bmi2:
movl state_h4(RSTATE), e;
xorl ne, ne;
- vmovdqa .Lbswap_shufb_ctl RIP, BSWAP_REG;
- vpbroadcastd .LK1 RIP, K1;
- vpbroadcastd .LK2 RIP, K2;
- vpbroadcastd .LK3 RIP, K3;
- vpbroadcastd .LK4 RIP, K4;
+ vmovdqa .Lbswap_shufb_ctl rRIP, BSWAP_REG;
+ vpbroadcastd .LK1 rRIP, K1;
+ vpbroadcastd .LK2 rRIP, K2;
+ vpbroadcastd .LK3 rRIP, K3;
+ vpbroadcastd .LK4 rRIP, K4;
/* Precalc 0-15. */
W_PRECALC_00_15_0(0, W0, Wtmp0);
@@ -424,16 +418,22 @@ _gcry_sha1_transform_amd64_avx_bmi2:
movl e, state_h4(RSTATE);
movq ROLDSTACK, %rsp;
+ CFI_REGISTER(ROLDSTACK, %rsp);
+ CFI_DEF_CFA_REGISTER(%rsp);
popq %r12;
+ CFI_POP(%r12);
popq %rbp;
+ CFI_POP(%rbp);
popq %rbx;
+ CFI_POP(%rbx);
/* stack already burned */
xorl %eax, %eax;
.Lret:
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_sha1_transform_amd64_avx_bmi2,
.-_gcry_sha1_transform_amd64_avx_bmi2;)
diff --git a/cipher/sha1-avx2-bmi2-amd64.S b/cipher/sha1-avx2-bmi2-amd64.S
index 2a2f21a5..93863230 100644
--- a/cipher/sha1-avx2-bmi2-amd64.S
+++ b/cipher/sha1-avx2-bmi2-amd64.S
@@ -34,18 +34,7 @@
defined(HAVE_GCC_INLINE_ASM_BMI2) && defined(HAVE_GCC_INLINE_ASM_AVX) && \
defined(HAVE_GCC_INLINE_ASM_AVX2) && defined(USE_SHA1)
-#ifdef __PIC__
-# define RIP (%rip)
-#else
-# define RIP
-#endif
-
-
-#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
-# define ELF(...) __VA_ARGS__
-#else
-# define ELF(...) /*_*/
-#endif
+#include "asm-common-amd64.h"
/* Context structure */
@@ -228,6 +217,7 @@ _gcry_sha1_transform_amd64_avx2_bmi2:
* %rsi: data (64*nblks bytes)
* %rdx: nblks (multiple of 2, larger than 0)
*/
+ CFI_STARTPROC();
vzeroupper;
@@ -235,10 +225,14 @@ _gcry_sha1_transform_amd64_avx2_bmi2:
movq %rdi, RSTATE;
movq %rsi, RDATA;
pushq %rbx;
+ CFI_PUSH(%rbx);
pushq %rbp;
+ CFI_PUSH(%rbp);
pushq %r12;
+ CFI_PUSH(%r12);
movq %rsp, ROLDSTACK;
+ CFI_DEF_CFA_REGISTER(ROLDSTACK);
subq $(WK_STACK_WORDS*4), %rsp;
andq $(~63), %rsp;
@@ -251,11 +245,11 @@ _gcry_sha1_transform_amd64_avx2_bmi2:
movl state_h4(RSTATE), e;
xorl ne, ne;
- vbroadcasti128 .Lbswap_shufb_ctl RIP, BSWAP_REG;
- vpbroadcastd .LK1 RIP, K1;
- vpbroadcastd .LK2 RIP, K2;
- vpbroadcastd .LK3 RIP, K3;
- vpbroadcastd .LK4 RIP, K4;
+ vbroadcasti128 .Lbswap_shufb_ctl rRIP, BSWAP_REG;
+ vpbroadcastd .LK1 rRIP, K1;
+ vpbroadcastd .LK2 rRIP, K2;
+ vpbroadcastd .LK3 rRIP, K3;
+ vpbroadcastd .LK4 rRIP, K4;
/* Precalc 0-31 for block 1 & 2. */
W_PRECALC_00_15_0(0, W0, Wtmp0);
@@ -557,15 +551,21 @@ _gcry_sha1_transform_amd64_avx2_bmi2:
movl e, state_h4(RSTATE);
movq ROLDSTACK, %rsp;
+ CFI_REGISTER(ROLDSTACK, %rsp);
+ CFI_DEF_CFA_REGISTER(%rsp);
popq %r12;
+ CFI_POP(%r12);
popq %rbp;
+ CFI_POP(%rbp);
popq %rbx;
+ CFI_POP(%rbx);
/* stack already burned */
xorl %eax, %eax;
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_sha1_transform_amd64_avx2_bmi2,
.-_gcry_sha1_transform_amd64_avx2_bmi2;)
diff --git a/cipher/sha1-ssse3-amd64.S b/cipher/sha1-ssse3-amd64.S
index fff14034..7e32b0f4 100644
--- a/cipher/sha1-ssse3-amd64.S
+++ b/cipher/sha1-ssse3-amd64.S
@@ -33,18 +33,7 @@
defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
defined(HAVE_GCC_INLINE_ASM_SSSE3) && defined(USE_SHA1)
-#ifdef __PIC__
-# define RIP (%rip)
-#else
-# define RIP
-#endif
-
-
-#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
-# define ELF(...) __VA_ARGS__
-#else
-# define ELF(...) /*_*/
-#endif
+#include "asm-common-amd64.h"
/* Context structure */
@@ -162,7 +151,7 @@
movdqa tmp0, W;
#define W_PRECALC_00_15_2(i, W, tmp0) \
- paddd (.LK_XMM + ((i)/20)*16) RIP, tmp0;
+ paddd (.LK_XMM + ((i)/20)*16) rRIP, tmp0;
#define W_PRECALC_00_15_3(i, W, tmp0) \
movdqa tmp0, WK(i&~3);
@@ -193,7 +182,7 @@
pxor W, tmp0; \
pxor tmp1, tmp0; \
movdqa tmp0, W; \
- paddd (.LK_XMM + ((i)/20)*16) RIP, tmp0; \
+ paddd (.LK_XMM + ((i)/20)*16) rRIP, tmp0; \
movdqa tmp0, WK((i)&~3);
#define W_PRECALC_32_79_0(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28, tmp0) \
@@ -213,7 +202,7 @@
#define W_PRECALC_32_79_3(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28, tmp0) \
movdqa tmp0, W; \
- paddd (.LK_XMM + ((i)/20)*16) RIP, tmp0; \
+ paddd (.LK_XMM + ((i)/20)*16) rRIP, tmp0; \
movdqa tmp0, WK((i)&~3);
#define CLEAR_REG(reg) pxor reg, reg;
@@ -235,6 +224,7 @@ _gcry_sha1_transform_amd64_ssse3:
* %rsi: data (64*nblks bytes)
* %rdx: nblks
*/
+ CFI_STARTPROC();
xorl %eax, %eax;
cmpq $0, %rdx;
@@ -244,9 +234,12 @@ _gcry_sha1_transform_amd64_ssse3:
movq %rdi, RSTATE;
movq %rsi, RDATA;
pushq %rbx;
+ CFI_PUSH(%rbx);
pushq %rbp;
+ CFI_PUSH(%rbp);
movq %rsp, ROLDSTACK;
+ CFI_DEF_CFA_REGISTER(ROLDSTACK);
subq $(16*4), %rsp;
andq $(~31), %rsp;
@@ -258,7 +251,7 @@ _gcry_sha1_transform_amd64_ssse3:
movl state_h3(RSTATE), d;
movl state_h4(RSTATE), e;
- movdqa .Lbswap_shufb_ctl RIP, BSWAP_REG;
+ movdqa .Lbswap_shufb_ctl rRIP, BSWAP_REG;
/* Precalc 0-15. */
W_PRECALC_00_15_0(0, W0, Wtmp0);
@@ -423,15 +416,20 @@ _gcry_sha1_transform_amd64_ssse3:
movl e, state_h4(RSTATE);
movq ROLDSTACK, %rsp;
+ CFI_REGISTER(ROLDSTACK, %rsp);
+ CFI_DEF_CFA_REGISTER(%rsp);
popq %rbp;
+ CFI_POP(%rbp);
popq %rbx;
+ CFI_POP(%rbx);
/* stack already burned */
xorl %eax, %eax;
.Lret:
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_sha1_transform_amd64_ssse3,
.-_gcry_sha1_transform_amd64_ssse3;)
diff --git a/cipher/sha256-avx-amd64.S b/cipher/sha256-avx-amd64.S
index b8b01b15..77143ff0 100644
--- a/cipher/sha256-avx-amd64.S
+++ b/cipher/sha256-avx-amd64.S
@@ -59,17 +59,7 @@
defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
defined(HAVE_GCC_INLINE_ASM_AVX) && defined(USE_SHA256)
-#ifdef __PIC__
-# define ADD_RIP +rip
-#else
-# define ADD_RIP
-#endif
-
-#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
-# define ELF(...) __VA_ARGS__
-#else
-# define ELF(...) /*_*/
-#endif
+#include "asm-common-amd64.h"
.intel_syntax noprefix
@@ -380,15 +370,22 @@ rotate_Xs
ELF(.type _gcry_sha256_transform_amd64_avx,@function;)
.align 16
_gcry_sha256_transform_amd64_avx:
+ CFI_STARTPROC()
vzeroupper
push rbx
+ CFI_PUSH(rbx)
push rbp
+ CFI_PUSH(rbp)
push r13
+ CFI_PUSH(r13)
push r14
+ CFI_PUSH(r14)
push r15
+ CFI_PUSH(r15)
sub rsp, STACK_SIZE
+ CFI_ADJUST_CFA_OFFSET(STACK_SIZE);
shl NUM_BLKS, 6 /* convert to bytes */
jz .Ldone_hash
@@ -487,14 +484,21 @@ _gcry_sha256_transform_amd64_avx:
xor eax, eax
add rsp, STACK_SIZE
+ CFI_ADJUST_CFA_OFFSET(-STACK_SIZE);
pop r15
+ CFI_POP(r15)
pop r14
+ CFI_POP(r14)
pop r13
+ CFI_POP(r13)
pop rbp
+ CFI_POP(rbp)
pop rbx
+ CFI_POP(rbx)
ret
+ CFI_ENDPROC()
.align 16
diff --git a/cipher/sha256-avx2-bmi2-amd64.S b/cipher/sha256-avx2-bmi2-amd64.S
index 5fc402cd..52be1a07 100644
--- a/cipher/sha256-avx2-bmi2-amd64.S
+++ b/cipher/sha256-avx2-bmi2-amd64.S
@@ -60,17 +60,7 @@
defined(HAVE_GCC_INLINE_ASM_AVX2) && defined(HAVE_GCC_INLINE_ASM_BMI2) && \
defined(USE_SHA256)
-#ifdef __PIC__
-# define ADD_RIP +rip
-#else
-# define ADD_RIP
-#endif
-
-#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
-# define ELF(...) __VA_ARGS__
-#else
-# define ELF(...) /*_*/
-#endif
+#include "asm-common-amd64.h"
.intel_syntax noprefix
@@ -314,17 +304,24 @@ a = TMP_
ELF(.type _gcry_sha256_transform_amd64_avx2,@function)
.align 32
_gcry_sha256_transform_amd64_avx2:
+ CFI_STARTPROC()
xor eax, eax
cmp rdx, 0
je .Lnowork
push rbx
+ CFI_PUSH(rbx)
push rbp
+ CFI_PUSH(rbp)
push r12
+ CFI_PUSH(r12)
push r13
+ CFI_PUSH(r13)
push r14
+ CFI_PUSH(r14)
push r15
+ CFI_PUSH(r15)
vzeroupper
@@ -333,9 +330,11 @@ _gcry_sha256_transform_amd64_avx2:
vmovdqa SHUF_DC00, [.L_SHUF_DC00 ADD_RIP]
mov rax, rsp
+ CFI_DEF_CFA_REGISTER(rax);
sub rsp, STACK_SIZE
and rsp, ~63
mov [rsp + _RSP], rax
+ CFI_CFA_ON_STACK(_RSP, 6 * 8)
shl NUM_BLKS, 6 /* convert to bytes */
lea NUM_BLKS, [NUM_BLKS + INP - 64] /* pointer to last block */
@@ -507,16 +506,24 @@ _gcry_sha256_transform_amd64_avx2:
xor eax, eax
mov rsp, [rsp + _RSP]
+ CFI_DEF_CFA_REGISTER(rsp)
pop r15
+ CFI_POP(r15)
pop r14
+ CFI_POP(r14)
pop r13
+ CFI_POP(r13)
pop r12
+ CFI_POP(r12)
pop rbp
+ CFI_POP(rbp)
pop rbx
+ CFI_POP(rbx)
.Lnowork:
ret
+ CFI_ENDPROC()
.align 64
.LK256:
diff --git a/cipher/sha256-ssse3-amd64.S b/cipher/sha256-ssse3-amd64.S
index ca5c9fd1..0fb94c1b 100644
--- a/cipher/sha256-ssse3-amd64.S
+++ b/cipher/sha256-ssse3-amd64.S
@@ -60,17 +60,7 @@
defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
defined(HAVE_GCC_INLINE_ASM_SSSE3) && defined(USE_SHA256)
-#ifdef __PIC__
-# define ADD_RIP +rip
-#else
-# define ADD_RIP
-#endif
-
-#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
-# define ELF(...) __VA_ARGS__
-#else
-# define ELF(...) /*_*/
-#endif
+#include "asm-common-amd64.h"
.intel_syntax noprefix
@@ -386,13 +376,20 @@ rotate_Xs
ELF(.type _gcry_sha256_transform_amd64_ssse3,@function;)
.align 16
_gcry_sha256_transform_amd64_ssse3:
+ CFI_STARTPROC()
push rbx
+ CFI_PUSH(rbx)
push rbp
+ CFI_PUSH(rbp)
push r13
+ CFI_PUSH(r13)
push r14
+ CFI_PUSH(r14)
push r15
+ CFI_PUSH(r15)
sub rsp, STACK_SIZE
+ CFI_ADJUST_CFA_OFFSET(STACK_SIZE);
shl NUM_BLKS, 6 /* convert to bytes */
jz .Ldone_hash
@@ -508,14 +505,21 @@ _gcry_sha256_transform_amd64_ssse3:
xor eax, eax
add rsp, STACK_SIZE
+ CFI_ADJUST_CFA_OFFSET(-STACK_SIZE);
pop r15
+ CFI_POP(r15)
pop r14
+ CFI_POP(r14)
pop r13
+ CFI_POP(r13)
pop rbp
+ CFI_POP(rbp)
pop rbx
+ CFI_POP(rbx)
ret
+ CFI_ENDPROC()
.align 16
diff --git a/cipher/sha512-avx-amd64.S b/cipher/sha512-avx-amd64.S
index 534351e4..991fd639 100644
--- a/cipher/sha512-avx-amd64.S
+++ b/cipher/sha512-avx-amd64.S
@@ -46,17 +46,7 @@
defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
defined(HAVE_GCC_INLINE_ASM_AVX) && defined(USE_SHA512)
-#ifdef __PIC__
-# define ADD_RIP +rip
-#else
-# define ADD_RIP
-#endif
-
-#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
-# define ELF(...) __VA_ARGS__
-#else
-# define ELF(...) /*_*/
-#endif
+#include "asm-common-amd64.h"
.intel_syntax noprefix
@@ -269,6 +259,7 @@ frame_size = ((frame_GPRSAVE) + (frame_GPRSAVE_size))
ELF(.type _gcry_sha512_transform_amd64_avx,@function;)
.align 16
_gcry_sha512_transform_amd64_avx:
+ CFI_STARTPROC()
xor eax, eax
cmp msglen, 0
@@ -278,6 +269,7 @@ _gcry_sha512_transform_amd64_avx:
/* Allocate Stack Space */
sub rsp, frame_size
+ CFI_ADJUST_CFA_OFFSET(frame_size);
/* Save GPRs */
mov [rsp + frame_GPRSAVE + 8 * 0], rbx
@@ -285,6 +277,11 @@ _gcry_sha512_transform_amd64_avx:
mov [rsp + frame_GPRSAVE + 8 * 2], r13
mov [rsp + frame_GPRSAVE + 8 * 3], r14
mov [rsp + frame_GPRSAVE + 8 * 4], r15
+ CFI_REL_OFFSET(rbx, frame_GPRSAVE + 8 * 0);
+ CFI_REL_OFFSET(r12, frame_GPRSAVE + 8 * 1);
+ CFI_REL_OFFSET(r13, frame_GPRSAVE + 8 * 2);
+ CFI_REL_OFFSET(r14, frame_GPRSAVE + 8 * 3);
+ CFI_REL_OFFSET(r15, frame_GPRSAVE + 8 * 4);
.Lupdateblock:
@@ -351,6 +348,11 @@ _gcry_sha512_transform_amd64_avx:
mov r13, [rsp + frame_GPRSAVE + 8 * 2]
mov r14, [rsp + frame_GPRSAVE + 8 * 3]
mov r15, [rsp + frame_GPRSAVE + 8 * 4]
+ CFI_RESTORE(rbx)
+ CFI_RESTORE(r12)
+ CFI_RESTORE(r13)
+ CFI_RESTORE(r14)
+ CFI_RESTORE(r15)
vzeroall
@@ -365,9 +367,11 @@ _gcry_sha512_transform_amd64_avx:
/* Restore Stack Pointer */
add rsp, frame_size
+ CFI_ADJUST_CFA_OFFSET(-frame_size);
.Lnowork:
ret
+ CFI_ENDPROC()
/*
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/cipher/sha512-avx2-bmi2-amd64.S b/cipher/sha512-avx2-bmi2-amd64.S
index 32cfceb0..3b28ab6c 100644
--- a/cipher/sha512-avx2-bmi2-amd64.S
+++ b/cipher/sha512-avx2-bmi2-amd64.S
@@ -49,17 +49,7 @@
defined(HAVE_GCC_INLINE_ASM_AVX2) && defined(HAVE_GCC_INLINE_ASM_BMI2) && \
defined(USE_SHA512)
-#ifdef __PIC__
-# define ADD_RIP +rip
-#else
-# define ADD_RIP
-#endif
-
-#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
-# define ELF(...) __VA_ARGS__
-#else
-# define ELF(...) /*_*/
-#endif
+#include "asm-common-amd64.h"
.intel_syntax noprefix
@@ -352,6 +342,7 @@ y4 = r12
ELF(.type _gcry_sha512_transform_amd64_avx2,@function;)
.align 16
_gcry_sha512_transform_amd64_avx2:
+ CFI_STARTPROC()
xor eax, eax
cmp rdx, 0
@@ -361,9 +352,11 @@ _gcry_sha512_transform_amd64_avx2:
/* Allocate Stack Space */
mov rax, rsp
+ CFI_DEF_CFA_REGISTER(rax);
sub rsp, frame_size
and rsp, ~(0x40 - 1)
mov [rsp + frame_RSPSAVE], rax
+ CFI_CFA_ON_STACK(frame_RSPSAVE, 0)
/* Save GPRs */
mov [rsp + frame_GPRSAVE + 8 * 0], rbp
@@ -372,6 +365,12 @@ _gcry_sha512_transform_amd64_avx2:
mov [rsp + frame_GPRSAVE + 8 * 3], r13
mov [rsp + frame_GPRSAVE + 8 * 4], r14
mov [rsp + frame_GPRSAVE + 8 * 5], r15
+ CFI_REG_ON_STACK(rbp, frame_GPRSAVE + 8 * 0)
+ CFI_REG_ON_STACK(rbx, frame_GPRSAVE + 8 * 1)
+ CFI_REG_ON_STACK(r12, frame_GPRSAVE + 8 * 2)
+ CFI_REG_ON_STACK(r13, frame_GPRSAVE + 8 * 3)
+ CFI_REG_ON_STACK(r14, frame_GPRSAVE + 8 * 4)
+ CFI_REG_ON_STACK(r15, frame_GPRSAVE + 8 * 5)
mov [rsp + frame_NBLKS], NUM_BLKS
@@ -494,11 +493,20 @@ _gcry_sha512_transform_amd64_avx2:
mov r13, [rsp + frame_GPRSAVE + 8 * 3]
mov r14, [rsp + frame_GPRSAVE + 8 * 4]
mov r15, [rsp + frame_GPRSAVE + 8 * 5]
+ CFI_RESTORE(rbp)
+ CFI_RESTORE(rbx)
+ CFI_RESTORE(r12)
+ CFI_RESTORE(r13)
+ CFI_RESTORE(r14)
+ CFI_RESTORE(r15)
/* Restore Stack Pointer */
mov rsp, [rsp + frame_RSPSAVE]
+ CFI_DEF_CFA_REGISTER(rsp)
+
.Lnowork:
ret
+ CFI_ENDPROC()
/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; */
/*;; Binary Data */
diff --git a/cipher/sha512-ssse3-amd64.S b/cipher/sha512-ssse3-amd64.S
index 8e950e0e..39bfe362 100644
--- a/cipher/sha512-ssse3-amd64.S
+++ b/cipher/sha512-ssse3-amd64.S
@@ -49,17 +49,7 @@
defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
defined(HAVE_GCC_INLINE_ASM_SSSE3) && defined(USE_SHA512)
-#ifdef __PIC__
-# define ADD_RIP +rip
-#else
-# define ADD_RIP
-#endif
-
-#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
-# define ELF(...) __VA_ARGS__
-#else
-# define ELF(...) /*_*/
-#endif
+#include "asm-common-amd64.h"
.intel_syntax noprefix
@@ -271,6 +261,7 @@ frame_size = ((frame_GPRSAVE) + (frame_GPRSAVE_size))
ELF(.type _gcry_sha512_transform_amd64_ssse3,@function;)
.align 16
_gcry_sha512_transform_amd64_ssse3:
+ CFI_STARTPROC()
xor eax, eax
cmp msglen, 0
@@ -278,6 +269,7 @@ _gcry_sha512_transform_amd64_ssse3:
/* Allocate Stack Space */
sub rsp, frame_size
+ CFI_ADJUST_CFA_OFFSET(frame_size);
/* Save GPRs */
mov [rsp + frame_GPRSAVE + 8 * 0], rbx
@@ -285,6 +277,11 @@ _gcry_sha512_transform_amd64_ssse3:
mov [rsp + frame_GPRSAVE + 8 * 2], r13
mov [rsp + frame_GPRSAVE + 8 * 3], r14
mov [rsp + frame_GPRSAVE + 8 * 4], r15
+ CFI_REL_OFFSET(rbx, frame_GPRSAVE + 8 * 0);
+ CFI_REL_OFFSET(r12, frame_GPRSAVE + 8 * 1);
+ CFI_REL_OFFSET(r13, frame_GPRSAVE + 8 * 2);
+ CFI_REL_OFFSET(r14, frame_GPRSAVE + 8 * 3);
+ CFI_REL_OFFSET(r15, frame_GPRSAVE + 8 * 4);
.Lupdateblock:
@@ -351,6 +348,11 @@ _gcry_sha512_transform_amd64_ssse3:
mov r13, [rsp + frame_GPRSAVE + 8 * 2]
mov r14, [rsp + frame_GPRSAVE + 8 * 3]
mov r15, [rsp + frame_GPRSAVE + 8 * 4]
+ CFI_RESTORE(rbx)
+ CFI_RESTORE(r12)
+ CFI_RESTORE(r13)
+ CFI_RESTORE(r14)
+ CFI_RESTORE(r15)
pxor xmm0, xmm0
pxor xmm1, xmm1
@@ -370,9 +372,11 @@ _gcry_sha512_transform_amd64_ssse3:
/* Restore Stack Pointer */
add rsp, frame_size
+ CFI_ADJUST_CFA_OFFSET(-frame_size);
.Lnowork:
ret
+ CFI_ENDPROC()
/*
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/cipher/twofish-amd64.S b/cipher/twofish-amd64.S
index 134d6401..3cb73431 100644
--- a/cipher/twofish-amd64.S
+++ b/cipher/twofish-amd64.S
@@ -171,12 +171,16 @@ _gcry_twofish_amd64_encrypt_block:
* %rsi: dst
* %rdx: src
*/
+ CFI_STARTPROC();
ENTER_SYSV_FUNC_PARAMS_0_4
subq $(3 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(3 * 8);
movq %rsi, (0 * 8)(%rsp);
movq %rbp, (1 * 8)(%rsp);
movq %rbx, (2 * 8)(%rsp);
+ CFI_REL_OFFSET(%rbp, 1 * 8);
+ CFI_REL_OFFSET(%rbx, 2 * 8);
movq %rdx, RX;
inpack(RX, 0, RAd, 0);
@@ -201,10 +205,14 @@ _gcry_twofish_amd64_encrypt_block:
movq (2 * 8)(%rsp), %rbx;
movq (1 * 8)(%rsp), %rbp;
+ CFI_RESTORE(%rbx);
+ CFI_RESTORE(%rbp);
addq $(3 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(-3 * 8);
EXIT_SYSV_FUNC
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_twofish_amd64_encrypt_block,.-_gcry_twofish_amd64_encrypt_block;)
.align 8
@@ -217,12 +225,16 @@ _gcry_twofish_amd64_decrypt_block:
* %rsi: dst
* %rdx: src
*/
+ CFI_STARTPROC();
ENTER_SYSV_FUNC_PARAMS_0_4
subq $(3 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(3 * 8);
movq %rsi, (0 * 8)(%rsp);
movq %rbp, (1 * 8)(%rsp);
movq %rbx, (2 * 8)(%rsp);
+ CFI_REL_OFFSET(%rbp, 1 * 8);
+ CFI_REL_OFFSET(%rbx, 2 * 8);
movq %rdx, RX;
inpack(RX, 0, RCd, 4);
@@ -247,10 +259,14 @@ _gcry_twofish_amd64_decrypt_block:
movq (2 * 8)(%rsp), %rbx;
movq (1 * 8)(%rsp), %rbp;
+ CFI_RESTORE(%rbx);
+ CFI_RESTORE(%rbp);
addq $(3 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(-3 * 8);
EXIT_SYSV_FUNC
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_twofish_amd64_encrypt_block,.-_gcry_twofish_amd64_encrypt_block;)
#undef CTX
@@ -480,6 +496,8 @@ __twofish_enc_blk3:
* output:
* RCD0,RAB0,RCD1,RAB1,RCD2,RAB2: three ciphertext blocks
*/
+ CFI_STARTPROC();
+
inpack_enc3();
encrypt_cycle3(RAB, RCD, 0);
@@ -494,6 +512,7 @@ __twofish_enc_blk3:
outunpack_enc3();
ret;
+ CFI_ENDPROC();
ELF(.size __twofish_enc_blk3,.-__twofish_enc_blk3;)
.align 8
@@ -506,6 +525,8 @@ __twofish_dec_blk3:
* output:
* RCD0,RAB0,RCD1,RAB1,RCD2,RAB2: three plaintext blocks
*/
+ CFI_STARTPROC();
+
inpack_dec3();
decrypt_cycle3(RAB, RCD, 7);
@@ -520,6 +541,7 @@ __twofish_dec_blk3:
outunpack_dec3();
ret;
+ CFI_ENDPROC();
ELF(.size __twofish_dec_blk3,.-__twofish_dec_blk3;)
.align 8
@@ -532,15 +554,23 @@ _gcry_twofish_amd64_ctr_enc:
* %rdx: src (3 blocks)
* %rcx: iv (big endian, 128bit)
*/
+ CFI_STARTPROC();
ENTER_SYSV_FUNC_PARAMS_0_4
subq $(8 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(8 * 8);
movq %rbp, (0 * 8)(%rsp);
movq %rbx, (1 * 8)(%rsp);
movq %r12, (2 * 8)(%rsp);
movq %r13, (3 * 8)(%rsp);
movq %r14, (4 * 8)(%rsp);
movq %r15, (5 * 8)(%rsp);
+ CFI_REL_OFFSET(%rbp, 0 * 8);
+ CFI_REL_OFFSET(%rbx, 1 * 8);
+ CFI_REL_OFFSET(%r12, 2 * 8);
+ CFI_REL_OFFSET(%r13, 3 * 8);
+ CFI_REL_OFFSET(%r14, 4 * 8);
+ CFI_REL_OFFSET(%r15, 5 * 8);
movq %rsi, (6 * 8)(%rsp);
movq %rdx, (7 * 8)(%rsp);
@@ -601,10 +631,18 @@ _gcry_twofish_amd64_ctr_enc:
movq (3 * 8)(%rsp), %r13;
movq (4 * 8)(%rsp), %r14;
movq (5 * 8)(%rsp), %r15;
+ CFI_RESTORE(%rbp);
+ CFI_RESTORE(%rbx);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
+ CFI_RESTORE(%r14);
+ CFI_RESTORE(%r15);
addq $(8 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(-8 * 8);
EXIT_SYSV_FUNC
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_twofish_amd64_ctr_enc,.-_gcry_twofish_amd64_ctr_enc;)
.align 8
@@ -617,15 +655,23 @@ _gcry_twofish_amd64_cbc_dec:
* %rdx: src (3 blocks)
* %rcx: iv (128bit)
*/
+ CFI_STARTPROC();
ENTER_SYSV_FUNC_PARAMS_0_4
subq $(9 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(9 * 8);
movq %rbp, (0 * 8)(%rsp);
movq %rbx, (1 * 8)(%rsp);
movq %r12, (2 * 8)(%rsp);
movq %r13, (3 * 8)(%rsp);
movq %r14, (4 * 8)(%rsp);
movq %r15, (5 * 8)(%rsp);
+ CFI_REL_OFFSET(%rbp, 0 * 8);
+ CFI_REL_OFFSET(%rbx, 1 * 8);
+ CFI_REL_OFFSET(%r12, 2 * 8);
+ CFI_REL_OFFSET(%r13, 3 * 8);
+ CFI_REL_OFFSET(%r14, 4 * 8);
+ CFI_REL_OFFSET(%r15, 5 * 8);
movq %rsi, (6 * 8)(%rsp);
movq %rdx, (7 * 8)(%rsp);
@@ -670,10 +716,18 @@ _gcry_twofish_amd64_cbc_dec:
movq (3 * 8)(%rsp), %r13;
movq (4 * 8)(%rsp), %r14;
movq (5 * 8)(%rsp), %r15;
+ CFI_RESTORE(%rbp);
+ CFI_RESTORE(%rbx);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
+ CFI_RESTORE(%r14);
+ CFI_RESTORE(%r15);
addq $(9 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(-9 * 8);
EXIT_SYSV_FUNC
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_twofish_amd64_cbc_dec,.-_gcry_twofish_amd64_cbc_dec;)
.align 8
@@ -686,15 +740,23 @@ _gcry_twofish_amd64_cfb_dec:
* %rdx: src (3 blocks)
* %rcx: iv (128bit)
*/
+ CFI_STARTPROC();
ENTER_SYSV_FUNC_PARAMS_0_4
subq $(8 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(8 * 8);
movq %rbp, (0 * 8)(%rsp);
movq %rbx, (1 * 8)(%rsp);
movq %r12, (2 * 8)(%rsp);
movq %r13, (3 * 8)(%rsp);
movq %r14, (4 * 8)(%rsp);
movq %r15, (5 * 8)(%rsp);
+ CFI_REL_OFFSET(%rbp, 0 * 8);
+ CFI_REL_OFFSET(%rbx, 1 * 8);
+ CFI_REL_OFFSET(%r12, 2 * 8);
+ CFI_REL_OFFSET(%r13, 3 * 8);
+ CFI_REL_OFFSET(%r14, 4 * 8);
+ CFI_REL_OFFSET(%r15, 5 * 8);
movq %rsi, (6 * 8)(%rsp);
movq %rdx, (7 * 8)(%rsp);
@@ -739,10 +801,18 @@ _gcry_twofish_amd64_cfb_dec:
movq (3 * 8)(%rsp), %r13;
movq (4 * 8)(%rsp), %r14;
movq (5 * 8)(%rsp), %r15;
+ CFI_RESTORE(%rbp);
+ CFI_RESTORE(%rbx);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
+ CFI_RESTORE(%r14);
+ CFI_RESTORE(%r15);
addq $(8 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(-8 * 8);
EXIT_SYSV_FUNC
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_twofish_amd64_cfb_dec,.-_gcry_twofish_amd64_cfb_dec;)
.align 8
@@ -757,15 +827,23 @@ _gcry_twofish_amd64_ocb_enc:
* %r8 : checksum
* %r9 : L pointers (void *L[3])
*/
+ CFI_STARTPROC();
ENTER_SYSV_FUNC_PARAMS_6
subq $(8 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(8 * 8);
movq %rbp, (0 * 8)(%rsp);
movq %rbx, (1 * 8)(%rsp);
movq %r12, (2 * 8)(%rsp);
movq %r13, (3 * 8)(%rsp);
movq %r14, (4 * 8)(%rsp);
movq %r15, (5 * 8)(%rsp);
+ CFI_REL_OFFSET(%rbp, 0 * 8);
+ CFI_REL_OFFSET(%rbx, 1 * 8);
+ CFI_REL_OFFSET(%r12, 2 * 8);
+ CFI_REL_OFFSET(%r13, 3 * 8);
+ CFI_REL_OFFSET(%r14, 4 * 8);
+ CFI_REL_OFFSET(%r15, 5 * 8);
movq %rsi, (6 * 8)(%rsp);
movq %rdx, RX0;
@@ -849,10 +927,18 @@ _gcry_twofish_amd64_ocb_enc:
movq (3 * 8)(%rsp), %r13;
movq (4 * 8)(%rsp), %r14;
movq (5 * 8)(%rsp), %r15;
+ CFI_RESTORE(%rbp);
+ CFI_RESTORE(%rbx);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
+ CFI_RESTORE(%r14);
+ CFI_RESTORE(%r15);
addq $(8 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(-8 * 8);
EXIT_SYSV_FUNC
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_twofish_amd64_ocb_enc,.-_gcry_twofish_amd64_ocb_enc;)
.align 8
@@ -867,15 +953,23 @@ _gcry_twofish_amd64_ocb_dec:
* %r8 : checksum
* %r9 : L pointers (void *L[3])
*/
+ CFI_STARTPROC();
ENTER_SYSV_FUNC_PARAMS_6
subq $(8 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(8 * 8);
movq %rbp, (0 * 8)(%rsp);
movq %rbx, (1 * 8)(%rsp);
movq %r12, (2 * 8)(%rsp);
movq %r13, (3 * 8)(%rsp);
movq %r14, (4 * 8)(%rsp);
movq %r15, (5 * 8)(%rsp);
+ CFI_REL_OFFSET(%rbp, 0 * 8);
+ CFI_REL_OFFSET(%rbx, 1 * 8);
+ CFI_REL_OFFSET(%r12, 2 * 8);
+ CFI_REL_OFFSET(%r13, 3 * 8);
+ CFI_REL_OFFSET(%r14, 4 * 8);
+ CFI_REL_OFFSET(%r15, 5 * 8);
movq %rsi, (6 * 8)(%rsp);
movq %r8, (7 * 8)(%rsp);
@@ -967,10 +1061,18 @@ _gcry_twofish_amd64_ocb_dec:
movq (3 * 8)(%rsp), %r13;
movq (4 * 8)(%rsp), %r14;
movq (5 * 8)(%rsp), %r15;
+ CFI_RESTORE(%rbp);
+ CFI_RESTORE(%rbx);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
+ CFI_RESTORE(%r14);
+ CFI_RESTORE(%r15);
addq $(8 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(-8 * 8);
EXIT_SYSV_FUNC
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_twofish_amd64_ocb_dec,.-_gcry_twofish_amd64_ocb_dec;)
.align 8
@@ -984,15 +1086,23 @@ _gcry_twofish_amd64_ocb_auth:
* %rcx: checksum
* %r8 : L pointers (void *L[3])
*/
+ CFI_STARTPROC();
ENTER_SYSV_FUNC_PARAMS_5
subq $(8 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(8 * 8);
movq %rbp, (0 * 8)(%rsp);
movq %rbx, (1 * 8)(%rsp);
movq %r12, (2 * 8)(%rsp);
movq %r13, (3 * 8)(%rsp);
movq %r14, (4 * 8)(%rsp);
movq %r15, (5 * 8)(%rsp);
+ CFI_REL_OFFSET(%rbp, 0 * 8);
+ CFI_REL_OFFSET(%rbx, 1 * 8);
+ CFI_REL_OFFSET(%r12, 2 * 8);
+ CFI_REL_OFFSET(%r13, 3 * 8);
+ CFI_REL_OFFSET(%r14, 4 * 8);
+ CFI_REL_OFFSET(%r15, 5 * 8);
movq %rcx, (6 * 8)(%rsp);
movq %rsi, RX0;
@@ -1056,10 +1166,18 @@ _gcry_twofish_amd64_ocb_auth:
movq (3 * 8)(%rsp), %r13;
movq (4 * 8)(%rsp), %r14;
movq (5 * 8)(%rsp), %r15;
+ CFI_RESTORE(%rbp);
+ CFI_RESTORE(%rbx);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
+ CFI_RESTORE(%r14);
+ CFI_RESTORE(%r15);
addq $(8 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(-8 * 8);
EXIT_SYSV_FUNC
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_twofish_amd64_ocb_auth,.-_gcry_twofish_amd64_ocb_auth;)
#endif /*USE_TWOFISH*/
diff --git a/cipher/twofish-avx2-amd64.S b/cipher/twofish-avx2-amd64.S
index db6e2182..74cad355 100644
--- a/cipher/twofish-avx2-amd64.S
+++ b/cipher/twofish-avx2-amd64.S
@@ -24,17 +24,7 @@
defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_TWOFISH) && \
defined(ENABLE_AVX2_SUPPORT)
-#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
-# define ELF(...) __VA_ARGS__
-#else
-# define ELF(...) /*_*/
-#endif
-
-#ifdef __PIC__
-# define RIP (%rip)
-#else
-# define RIP
-#endif
+#include "asm-common-amd64.h"
.text
@@ -423,6 +413,7 @@ __twofish_enc_blk16:
* RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: sixteen parallel
* ciphertext blocks
*/
+ CFI_STARTPROC();
init_round_constants();
transpose4x4_16(RA, RB, RC, RD);
@@ -441,6 +432,7 @@ __twofish_enc_blk16:
transpose4x4_16(RA, RB, RC, RD);
ret;
+ CFI_ENDPROC();
ELF(.size __twofish_enc_blk16,.-__twofish_enc_blk16;)
.align 8
@@ -454,6 +446,7 @@ __twofish_dec_blk16:
* RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: sixteen parallel
* ciphertext blocks
*/
+ CFI_STARTPROC();
init_round_constants();
transpose4x4_16(RA, RB, RC, RD);
@@ -472,6 +465,7 @@ __twofish_dec_blk16:
transpose4x4_16(RA, RB, RC, RD);
ret;
+ CFI_ENDPROC();
ELF(.size __twofish_dec_blk16,.-__twofish_dec_blk16;)
#define inc_le128(x, minus_one, tmp) \
@@ -490,13 +484,14 @@ _gcry_twofish_avx2_ctr_enc:
* %rdx: src (16 blocks)
* %rcx: iv (big endian, 128bit)
*/
+ CFI_STARTPROC();
movq 8(%rcx), %rax;
bswapq %rax;
vzeroupper;
- vbroadcasti128 .Lbswap128_mask RIP, RTMP3;
+ vbroadcasti128 .Lbswap128_mask rRIP, RTMP3;
vpcmpeqd RNOT, RNOT, RNOT;
vpsrldq $8, RNOT, RNOT; /* ab: -1:0 ; cd: -1:0 */
vpaddq RNOT, RNOT, RTMP2; /* ab: -2:0 ; cd: -2:0 */
@@ -587,7 +582,8 @@ _gcry_twofish_avx2_ctr_enc:
vzeroall;
- ret
+ ret;
+ CFI_ENDPROC();
ELF(.size _gcry_twofish_avx2_ctr_enc,.-_gcry_twofish_avx2_ctr_enc;)
.align 8
@@ -600,6 +596,7 @@ _gcry_twofish_avx2_cbc_dec:
* %rdx: src (16 blocks)
* %rcx: iv
*/
+ CFI_STARTPROC();
vzeroupper;
@@ -638,7 +635,8 @@ _gcry_twofish_avx2_cbc_dec:
vzeroall;
- ret
+ ret;
+ CFI_ENDPROC();
ELF(.size _gcry_twofish_avx2_cbc_dec,.-_gcry_twofish_avx2_cbc_dec;)
.align 8
@@ -651,6 +649,7 @@ _gcry_twofish_avx2_cfb_dec:
* %rdx: src (16 blocks)
* %rcx: iv
*/
+ CFI_STARTPROC();
vzeroupper;
@@ -691,7 +690,8 @@ _gcry_twofish_avx2_cfb_dec:
vzeroall;
- ret
+ ret;
+ CFI_ENDPROC();
ELF(.size _gcry_twofish_avx2_cfb_dec,.-_gcry_twofish_avx2_cfb_dec;)
.align 8
@@ -707,15 +707,21 @@ _gcry_twofish_avx2_ocb_enc:
* %r8 : checksum
* %r9 : L pointers (void *L[16])
*/
+ CFI_STARTPROC();
vzeroupper;
subq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(4 * 8);
movq %r10, (0 * 8)(%rsp);
movq %r11, (1 * 8)(%rsp);
movq %r12, (2 * 8)(%rsp);
movq %r13, (3 * 8)(%rsp);
+ CFI_REL_OFFSET(%r10, 0 * 8);
+ CFI_REL_OFFSET(%r11, 1 * 8);
+ CFI_REL_OFFSET(%r12, 2 * 8);
+ CFI_REL_OFFSET(%r13, 3 * 8);
vmovdqu (%rcx), RTMP0x;
vmovdqu (%r8), RTMP1x;
@@ -768,10 +774,15 @@ _gcry_twofish_avx2_ocb_enc:
movq (1 * 8)(%rsp), %r11;
movq (2 * 8)(%rsp), %r12;
movq (3 * 8)(%rsp), %r13;
+ CFI_RESTORE(%r10);
+ CFI_RESTORE(%r11);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
call __twofish_enc_blk16;
addq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(-4 * 8);
vpxor (0 * 32)(%rsi), RA0, RA0;
vpxor (1 * 32)(%rsi), RB0, RB0;
@@ -794,6 +805,7 @@ _gcry_twofish_avx2_ocb_enc:
vzeroall;
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_twofish_avx2_ocb_enc,.-_gcry_twofish_avx2_ocb_enc;)
.align 8
@@ -809,15 +821,21 @@ _gcry_twofish_avx2_ocb_dec:
* %r8 : checksum
* %r9 : L pointers (void *L[16])
*/
+ CFI_STARTPROC();
vzeroupper;
subq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(4 * 8);
movq %r10, (0 * 8)(%rsp);
movq %r11, (1 * 8)(%rsp);
movq %r12, (2 * 8)(%rsp);
movq %r13, (3 * 8)(%rsp);
+ CFI_REL_OFFSET(%r10, 0 * 8);
+ CFI_REL_OFFSET(%r11, 1 * 8);
+ CFI_REL_OFFSET(%r12, 2 * 8);
+ CFI_REL_OFFSET(%r13, 3 * 8);
vmovdqu (%rcx), RTMP0x;
@@ -865,6 +883,10 @@ _gcry_twofish_avx2_ocb_dec:
movq (1 * 8)(%rsp), %r11;
movq (2 * 8)(%rsp), %r12;
movq (3 * 8)(%rsp), %r13;
+ CFI_RESTORE(%r10);
+ CFI_RESTORE(%r11);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
call __twofish_dec_blk16;
@@ -880,6 +902,7 @@ _gcry_twofish_avx2_ocb_dec:
vpxor (7 * 32)(%rsi), RD1, RD1;
addq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(-4 * 8);
/* Checksum_i = Checksum_{i-1} xor P_i */
@@ -907,6 +930,7 @@ _gcry_twofish_avx2_ocb_dec:
vzeroall;
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_twofish_avx2_ocb_dec,.-_gcry_twofish_avx2_ocb_dec;)
.align 8
@@ -921,15 +945,21 @@ _gcry_twofish_avx2_ocb_auth:
* %rcx: checksum
* %r8 : L pointers (void *L[16])
*/
+ CFI_STARTPROC();
vzeroupper;
subq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(4 * 8);
movq %r10, (0 * 8)(%rsp);
movq %r11, (1 * 8)(%rsp);
movq %r12, (2 * 8)(%rsp);
movq %r13, (3 * 8)(%rsp);
+ CFI_REL_OFFSET(%r10, 0 * 8);
+ CFI_REL_OFFSET(%r11, 1 * 8);
+ CFI_REL_OFFSET(%r12, 2 * 8);
+ CFI_REL_OFFSET(%r13, 3 * 8);
vmovdqu (%rdx), RTMP0x;
@@ -975,6 +1005,10 @@ _gcry_twofish_avx2_ocb_auth:
movq (1 * 8)(%rsp), %r11;
movq (2 * 8)(%rsp), %r12;
movq (3 * 8)(%rsp), %r13;
+ CFI_RESTORE(%r10);
+ CFI_RESTORE(%r11);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
call __twofish_enc_blk16;
@@ -987,6 +1021,7 @@ _gcry_twofish_avx2_ocb_auth:
vpxor RA1, RC1, RA1;
addq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(-4 * 8);
vpxor RA1, RA0, RTMP1;
@@ -998,6 +1033,7 @@ _gcry_twofish_avx2_ocb_auth:
vzeroall;
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_twofish_avx2_ocb_auth,.-_gcry_twofish_avx2_ocb_auth;)
.align 16
diff --git a/cipher/whirlpool-sse2-amd64.S b/cipher/whirlpool-sse2-amd64.S
index e98b831c..5631dc56 100644
--- a/cipher/whirlpool-sse2-amd64.S
+++ b/cipher/whirlpool-sse2-amd64.S
@@ -23,17 +23,7 @@
#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_WHIRLPOOL)
-#ifdef __PIC__
-# define RIP %rip
-#else
-# define RIP
-#endif
-
-#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
-# define ELF(...) __VA_ARGS__
-#else
-# define ELF(...) /*_*/
-#endif
+#include "asm-common-amd64.h"
.text
@@ -173,16 +163,24 @@ _gcry_whirlpool_transform_amd64:
* %rdx: nblks
* %rcx: look-up tables
*/
+ CFI_STARTPROC();
cmp $0, %rdx;
je .Lskip;
subq $STACK_MAX, %rsp;
+ CFI_ADJUST_CFA_OFFSET(STACK_MAX);
movq %rbp, STACK_RBP(%rsp);
movq %rbx, STACK_RBX(%rsp);
movq %r12, STACK_R12(%rsp);
movq %r13, STACK_R13(%rsp);
movq %r14, STACK_R14(%rsp);
movq %r15, STACK_R15(%rsp);
+ CFI_REL_OFFSET(%rbp, STACK_RBP);
+ CFI_REL_OFFSET(%rbx, STACK_RBX);
+ CFI_REL_OFFSET(%r12, STACK_R12);
+ CFI_REL_OFFSET(%r13, STACK_R13);
+ CFI_REL_OFFSET(%r14, STACK_R14);
+ CFI_REL_OFFSET(%r15, STACK_R15);
movq %rdx, STACK_NBLKS(%rsp);
movq %rdi, STACK_STATEP(%rsp);
@@ -332,10 +330,18 @@ _gcry_whirlpool_transform_amd64:
movq STACK_R13(%rsp), %r13;
movq STACK_R14(%rsp), %r14;
movq STACK_R15(%rsp), %r15;
+ CFI_RESTORE(%rbp);
+ CFI_RESTORE(%rbx);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
+ CFI_RESTORE(%r14);
+ CFI_RESTORE(%r15);
addq $STACK_MAX, %rsp;
+ CFI_ADJUST_CFA_OFFSET(-STACK_MAX);
.Lskip:
movl $(STACK_MAX + 8), %eax;
ret;
+ CFI_ENDPROC();
ELF(.size _gcry_whirlpool_transform_amd64,.-_gcry_whirlpool_transform_amd64;)
#endif