diff options
Diffstat (limited to 'cipher')
29 files changed, 1109 insertions, 350 deletions
diff --git a/cipher/arcfour-amd64.S b/cipher/arcfour-amd64.S index c08f3453..221dfeff 100644 --- a/cipher/arcfour-amd64.S +++ b/cipher/arcfour-amd64.S @@ -25,9 +25,12 @@ .globl _gcry_arcfour_amd64 ELF(.type _gcry_arcfour_amd64,@function) _gcry_arcfour_amd64: + CFI_STARTPROC() ENTER_SYSV_FUNC_PARAMS_0_4 push %rbp + CFI_PUSH(%rbp) push %rbx + CFI_PUSH(%rbx) mov %rdi, %rbp # key = ARG(key) mov %rsi, %rbx # rbx = ARG(len) mov %rdx, %rsi # in = ARG(in) @@ -92,9 +95,12 @@ _gcry_arcfour_amd64: movb %cl, (4*256)(%rbp) # key->y = y movb %dl, (4*256+4)(%rbp) # key->x = x pop %rbx + CFI_POP(%rbx) pop %rbp + CFI_POP(%rbp) EXIT_SYSV_FUNC ret + CFI_ENDPROC() .L__gcry_arcfour_amd64_end: ELF(.size _gcry_arcfour_amd64,.L__gcry_arcfour_amd64_end-_gcry_arcfour_amd64) diff --git a/cipher/asm-common-amd64.h b/cipher/asm-common-amd64.h index 7eb42649..9d4a028a 100644 --- a/cipher/asm-common-amd64.h +++ b/cipher/asm-common-amd64.h @@ -41,6 +41,12 @@ # define RIP #endif +#ifdef __PIC__ +# define ADD_RIP +rip +#else +# define ADD_RIP +#endif + #if defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS) || !defined(__PIC__) # define GET_EXTERN_POINTER(name, reg) movabsq $name, reg #else @@ -60,10 +66,101 @@ # endif #endif +#ifdef HAVE_GCC_ASM_CFI_DIRECTIVES +/* CFI directives to emit DWARF stack unwinding information. */ +# define CFI_STARTPROC() .cfi_startproc +# define CFI_ENDPROC() .cfi_endproc +# define CFI_REMEMBER_STATE() .cfi_remember_state +# define CFI_RESTORE_STATE() .cfi_restore_state +# define CFI_ADJUST_CFA_OFFSET(off) .cfi_adjust_cfa_offset off +# define CFI_REL_OFFSET(reg,off) .cfi_rel_offset reg, off +# define CFI_DEF_CFA_REGISTER(reg) .cfi_def_cfa_register reg +# define CFI_REGISTER(ro,rn) .cfi_register ro, rn +# define CFI_RESTORE(reg) .cfi_restore reg + +# define CFI_PUSH(reg) \ + CFI_ADJUST_CFA_OFFSET(8); CFI_REL_OFFSET(reg, 0) +# define CFI_POP(reg) \ + CFI_ADJUST_CFA_OFFSET(-8); CFI_RESTORE(reg) +# define CFI_POP_TMP_REG() \ + CFI_ADJUST_CFA_OFFSET(-8); +# define CFI_LEAVE() \ + CFI_ADJUST_CFA_OFFSET(-8); CFI_DEF_CFA_REGISTER(%rsp) + +/* CFA expressions are used for pointing CFA and registers to + * %rsp relative offsets. */ +# define DW_REGNO_rax 0 +# define DW_REGNO_rdx 1 +# define DW_REGNO_rcx 2 +# define DW_REGNO_rbx 3 +# define DW_REGNO_rsi 4 +# define DW_REGNO_rdi 5 +# define DW_REGNO_rbp 6 +# define DW_REGNO_rsp 7 +# define DW_REGNO_r8 8 +# define DW_REGNO_r9 9 +# define DW_REGNO_r10 10 +# define DW_REGNO_r11 11 +# define DW_REGNO_r12 12 +# define DW_REGNO_r13 13 +# define DW_REGNO_r14 14 +# define DW_REGNO_r15 15 + +# define DW_REGNO(reg) DW_REGNO_ ## reg + +/* Fixed length encoding used for integers for now. */ +# define DW_SLEB128_7BIT(value) \ + 0x00|((value) & 0x7f) +# define DW_SLEB128_28BIT(value) \ + 0x80|((value)&0x7f), \ + 0x80|(((value)>>7)&0x7f), \ + 0x80|(((value)>>14)&0x7f), \ + 0x00|(((value)>>21)&0x7f) + +# define CFI_CFA_ON_STACK(rsp_offs,cfa_depth) \ + .cfi_escape \ + 0x0f, /* DW_CFA_def_cfa_expression */ \ + DW_SLEB128_7BIT(11), /* length */ \ + 0x77, /* DW_OP_breg7, rsp + constant */ \ + DW_SLEB128_28BIT(rsp_offs), \ + 0x06, /* DW_OP_deref */ \ + 0x23, /* DW_OP_plus_constu */ \ + DW_SLEB128_28BIT((cfa_depth)+8) + +# define CFI_REG_ON_STACK(reg,rsp_offs) \ + .cfi_escape \ + 0x10, /* DW_CFA_expression */ \ + DW_SLEB128_7BIT(DW_REGNO(reg)), \ + DW_SLEB128_7BIT(5), /* length */ \ + 0x77, /* DW_OP_breg7, rsp + constant */ \ + DW_SLEB128_28BIT(rsp_offs) + +#else +# define CFI_STARTPROC() +# define CFI_ENDPROC() +# define CFI_REMEMBER_STATE() +# define CFI_RESTORE_STATE() +# define CFI_ADJUST_CFA_OFFSET(off) +# define CFI_REL_OFFSET(reg,off) +# define CFI_DEF_CFA_REGISTER(reg) +# define CFI_REGISTER(ro,rn) +# define CFI_RESTORE(reg) + +# define CFI_PUSH(reg) +# define CFI_POP(reg) +# define CFI_POP_TMP_REG() +# define CFI_LEAVE() + +# define CFI_CFA_ON_STACK(rsp_offs,cfa_depth) +# define CFI_REG_ON_STACK(reg,rsp_offs) +#endif + #ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS # define ENTER_SYSV_FUNC_PARAMS_0_4 \ pushq %rdi; \ + CFI_PUSH(%rdi); \ pushq %rsi; \ + CFI_PUSH(%rsi); \ movq %rcx, %rdi; \ movq %rdx, %rsi; \ movq %r8, %rdx; \ @@ -79,7 +176,9 @@ # define EXIT_SYSV_FUNC \ popq %rsi; \ - popq %rdi; + CFI_POP(%rsi); \ + popq %rdi; \ + CFI_POP(%rdi); #else # define ENTER_SYSV_FUNC_PARAMS_0_4 # define ENTER_SYSV_FUNC_PARAMS_5 diff --git a/cipher/blake2b-amd64-avx2.S b/cipher/blake2b-amd64-avx2.S index 6bcc5652..08c816cd 100644 --- a/cipher/blake2b-amd64-avx2.S +++ b/cipher/blake2b-amd64-avx2.S @@ -207,6 +207,7 @@ _gcry_blake2b_transform_amd64_avx2: * %rsi: blks * %rdx: num_blks */ + CFI_STARTPROC(); vzeroupper; @@ -291,6 +292,7 @@ _gcry_blake2b_transform_amd64_avx2: xor %eax, %eax; vzeroall; ret; + CFI_ENDPROC(); ELF(.size _gcry_blake2b_transform_amd64_avx2, .-_gcry_blake2b_transform_amd64_avx2;) diff --git a/cipher/blake2s-amd64-avx.S b/cipher/blake2s-amd64-avx.S index f7312dbd..19837326 100644 --- a/cipher/blake2s-amd64-avx.S +++ b/cipher/blake2s-amd64-avx.S @@ -191,6 +191,7 @@ _gcry_blake2s_transform_amd64_avx: * %rsi: blks * %rdx: num_blks */ + CFI_STARTPROC(); vzeroupper; @@ -269,6 +270,7 @@ _gcry_blake2s_transform_amd64_avx: xor %eax, %eax; vzeroall; ret; + CFI_ENDPROC(); ELF(.size _gcry_blake2s_transform_amd64_avx, .-_gcry_blake2s_transform_amd64_avx;) diff --git a/cipher/blowfish-amd64.S b/cipher/blowfish-amd64.S index 02d3b710..bdb361d7 100644 --- a/cipher/blowfish-amd64.S +++ b/cipher/blowfish-amd64.S @@ -133,7 +133,9 @@ __blowfish_enc_blk1: * output: * RX0: output plaintext block */ + CFI_STARTPROC(); movq %rbp, %r11; + CFI_REGISTER(%rbp, %r11); load_roundkey_enc(0); round_enc(2); @@ -147,8 +149,10 @@ __blowfish_enc_blk1: add_roundkey_enc(); movq %r11, %rbp; + CFI_RESTORE(%rbp) ret; + CFI_ENDPROC(); ELF(.size __blowfish_enc_blk1,.-__blowfish_enc_blk1;) .align 8 @@ -161,6 +165,7 @@ _gcry_blowfish_amd64_do_encrypt: * %rsi: u32 *ret_xl * %rdx: u32 *ret_xr */ + CFI_STARTPROC(); ENTER_SYSV_FUNC_PARAMS_0_4 movl (%rdx), RX0d; @@ -178,6 +183,7 @@ _gcry_blowfish_amd64_do_encrypt: EXIT_SYSV_FUNC ret; + CFI_ENDPROC(); ELF(.size _gcry_blowfish_amd64_do_encrypt,.-_gcry_blowfish_amd64_do_encrypt;) .align 8 @@ -190,6 +196,7 @@ _gcry_blowfish_amd64_encrypt_block: * %rsi: dst * %rdx: src */ + CFI_STARTPROC(); ENTER_SYSV_FUNC_PARAMS_0_4 movq %rsi, %r10; @@ -204,6 +211,7 @@ _gcry_blowfish_amd64_encrypt_block: EXIT_SYSV_FUNC ret; + CFI_ENDPROC(); ELF(.size _gcry_blowfish_amd64_encrypt_block,.-_gcry_blowfish_amd64_encrypt_block;) .align 8 @@ -216,9 +224,11 @@ _gcry_blowfish_amd64_decrypt_block: * %rsi: dst * %rdx: src */ + CFI_STARTPROC(); ENTER_SYSV_FUNC_PARAMS_0_4 movq %rbp, %r11; + CFI_REGISTER(%rbp, %r11); movq %rsi, %r10; movq %rdx, RIO; @@ -240,9 +250,11 @@ _gcry_blowfish_amd64_decrypt_block: write_block(); movq %r11, %rbp; + CFI_RESTORE(%rbp); EXIT_SYSV_FUNC ret; + CFI_ENDPROC(); ELF(.size _gcry_blowfish_amd64_decrypt_block,.-_gcry_blowfish_amd64_decrypt_block;) /********************************************************************** @@ -340,6 +352,7 @@ __blowfish_enc_blk4: * output: * RX0,RX1,RX2,RX3: four output ciphertext blocks */ + CFI_STARTPROC(); preload_roundkey_enc(0); round_enc4(0); @@ -355,6 +368,7 @@ __blowfish_enc_blk4: outbswap_block4(); ret; + CFI_ENDPROC(); ELF(.size __blowfish_enc_blk4,.-__blowfish_enc_blk4;) .align 8 @@ -367,6 +381,7 @@ __blowfish_dec_blk4: * output: * RX0,RX1,RX2,RX3: four output plaintext blocks */ + CFI_STARTPROC(); preload_roundkey_dec(17); inbswap_block4(); @@ -384,6 +399,7 @@ __blowfish_dec_blk4: outbswap_block4(); ret; + CFI_ENDPROC(); ELF(.size __blowfish_dec_blk4,.-__blowfish_dec_blk4;) .align 8 @@ -396,12 +412,17 @@ _gcry_blowfish_amd64_ctr_enc: * %rdx: src (4 blocks) * %rcx: iv (big endian, 64bit) */ + CFI_STARTPROC(); ENTER_SYSV_FUNC_PARAMS_0_4 pushq %rbp; + CFI_PUSH(%rbp); pushq %rbx; + CFI_PUSH(%rbx); pushq %r12; + CFI_PUSH(%r12); pushq %r13; + CFI_PUSH(%r13); /* %r11-%r13 are not used by __blowfish_enc_blk4 */ movq %rcx, %r13; /*iv*/ @@ -438,12 +459,17 @@ _gcry_blowfish_amd64_ctr_enc: movq RX3, 3 * 8(%r11); popq %r13; + CFI_POP(%r13); popq %r12; + CFI_POP(%r12); popq %rbx; + CFI_POP(%rbx); popq %rbp; + CFI_POP(%rbp); EXIT_SYSV_FUNC ret; + CFI_ENDPROC(); ELF(.size _gcry_blowfish_amd64_ctr_enc,.-_gcry_blowfish_amd64_ctr_enc;) .align 8 @@ -456,12 +482,17 @@ _gcry_blowfish_amd64_cbc_dec: * %rdx: src (4 blocks) * %rcx: iv (64bit) */ + CFI_STARTPROC(); ENTER_SYSV_FUNC_PARAMS_0_4 pushq %rbp; + CFI_PUSH(%rbp); pushq %rbx; + CFI_PUSH(%rbx); pushq %r12; + CFI_PUSH(%r12); pushq %r13; + CFI_PUSH(%r13); /* %r11-%r13 are not used by __blowfish_dec_blk4 */ movq %rsi, %r11; /*dst*/ @@ -489,12 +520,17 @@ _gcry_blowfish_amd64_cbc_dec: movq RX3, 3 * 8(%r11); popq %r13; + CFI_POP(%r13); popq %r12; + CFI_POP(%r12); popq %rbx; + CFI_POP(%rbx); popq %rbp; + CFI_POP(%rbp); EXIT_SYSV_FUNC ret; + CFI_ENDPROC(); ELF(.size _gcry_blowfish_amd64_cbc_dec,.-_gcry_blowfish_amd64_cbc_dec;) .align 8 @@ -507,12 +543,17 @@ _gcry_blowfish_amd64_cfb_dec: * %rdx: src (4 blocks) * %rcx: iv (64bit) */ + CFI_STARTPROC(); ENTER_SYSV_FUNC_PARAMS_0_4 pushq %rbp; + CFI_PUSH(%rbp); pushq %rbx; + CFI_PUSH(%rbx); pushq %r12; + CFI_PUSH(%r12); pushq %r13; + CFI_PUSH(%r13); /* %r11-%r13 are not used by __blowfish_enc_blk4 */ movq %rcx, %r13; /*iv*/ @@ -543,12 +584,17 @@ _gcry_blowfish_amd64_cfb_dec: movq RX3, 3 * 8(%r11); popq %r13; + CFI_POP(%r13); popq %r12; + CFI_POP(%r12); popq %rbx; + CFI_POP(%rbx); popq %rbp; + CFI_POP(%rbp); EXIT_SYSV_FUNC ret; + CFI_ENDPROC(); ELF(.size _gcry_blowfish_amd64_cfb_dec,.-_gcry_blowfish_amd64_cfb_dec;) #endif /*defined(USE_BLOWFISH)*/ diff --git a/cipher/camellia-aesni-avx-amd64.S b/cipher/camellia-aesni-avx-amd64.S index 8022934f..e16d4f61 100644 --- a/cipher/camellia-aesni-avx-amd64.S +++ b/cipher/camellia-aesni-avx-amd64.S @@ -24,17 +24,7 @@ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT) -#ifdef __PIC__ -# define RIP (%rip) -#else -# define RIP -#endif - -#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS -# define ELF(...) __VA_ARGS__ -#else -# define ELF(...) /*_*/ -#endif +#include "asm-common-amd64.h" #define CAMELLIA_TABLE_BYTE_LEN 272 @@ -75,10 +65,10 @@ /* \ * S-function with AES subbytes \ */ \ - vmovdqa .Linv_shift_row RIP, t4; \ - vbroadcastss .L0f0f0f0f RIP, t7; \ - vmovdqa .Lpre_tf_lo_s1 RIP, t0; \ - vmovdqa .Lpre_tf_hi_s1 RIP, t1; \ + vmovdqa .Linv_shift_row rRIP, t4; \ + vbroadcastss .L0f0f0f0f rRIP, t7; \ + vmovdqa .Lpre_tf_lo_s1 rRIP, t0; \ + vmovdqa .Lpre_tf_hi_s1 rRIP, t1; \ \ /* AES inverse shift rows */ \ vpshufb t4, x0, x0; \ @@ -91,8 +81,8 @@ vpshufb t4, x6, x6; \ \ /* prefilter sboxes 1, 2 and 3 */ \ - vmovdqa .Lpre_tf_lo_s4 RIP, t2; \ - vmovdqa .Lpre_tf_hi_s4 RIP, t3; \ + vmovdqa .Lpre_tf_lo_s4 rRIP, t2; \ + vmovdqa .Lpre_tf_hi_s4 rRIP, t3; \ filter_8bit(x0, t0, t1, t7, t6); \ filter_8bit(x7, t0, t1, t7, t6); \ filter_8bit(x1, t0, t1, t7, t6); \ @@ -106,8 +96,8 @@ filter_8bit(x6, t2, t3, t7, t6); \ \ /* AES subbytes + AES shift rows */ \ - vmovdqa .Lpost_tf_lo_s1 RIP, t0; \ - vmovdqa .Lpost_tf_hi_s1 RIP, t1; \ + vmovdqa .Lpost_tf_lo_s1 rRIP, t0; \ + vmovdqa .Lpost_tf_hi_s1 rRIP, t1; \ vaesenclast t4, x0, x0; \ vaesenclast t4, x7, x7; \ vaesenclast t4, x1, x1; \ @@ -118,16 +108,16 @@ vaesenclast t4, x6, x6; \ \ /* postfilter sboxes 1 and 4 */ \ - vmovdqa .Lpost_tf_lo_s3 RIP, t2; \ - vmovdqa .Lpost_tf_hi_s3 RIP, t3; \ + vmovdqa .Lpost_tf_lo_s3 rRIP, t2; \ + vmovdqa .Lpost_tf_hi_s3 rRIP, t3; \ filter_8bit(x0, t0, t1, t7, t6); \ filter_8bit(x7, t0, t1, t7, t6); \ filter_8bit(x3, t0, t1, t7, t6); \ filter_8bit(x6, t0, t1, t7, t6); \ \ /* postfilter sbox 3 */ \ - vmovdqa .Lpost_tf_lo_s2 RIP, t4; \ - vmovdqa .Lpost_tf_hi_s2 RIP, t5; \ + vmovdqa .Lpost_tf_lo_s2 rRIP, t4; \ + vmovdqa .Lpost_tf_hi_s2 rRIP, t5; \ filter_8bit(x2, t2, t3, t7, t6); \ filter_8bit(x5, t2, t3, t7, t6); \ \ @@ -442,7 +432,7 @@ transpose_4x4(c0, c1, c2, c3, a0, a1); \ transpose_4x4(d0, d1, d2, d3, a0, a1); \ \ - vmovdqu .Lshufb_16x16b RIP, a0; \ + vmovdqu .Lshufb_16x16b rRIP, a0; \ vmovdqu st1, a1; \ vpshufb a0, a2, a2; \ vpshufb a0, a3, a3; \ @@ -508,7 +498,7 @@ vpunpcklwd t1, t3, e; \ vpunpckhwd t1, t3, f; \ \ - vmovdqa .Ltranspose_8x8_shuf RIP, t3; \ + vmovdqa .Ltranspose_8x8_shuf rRIP, t3; \ \ vpunpcklwd g, c, d; \ vpunpckhwd g, c, c; \ @@ -540,7 +530,7 @@ #define inpack16_pre(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ y6, y7, rio, key) \ vmovq key, x0; \ - vpshufb .Lpack_bswap RIP, x0, x0; \ + vpshufb .Lpack_bswap rRIP, x0, x0; \ \ vpxor 0 * 16(rio), x0, y7; \ vpxor 1 * 16(rio), x0, y6; \ @@ -591,7 +581,7 @@ vmovdqu x0, stack_tmp0; \ \ vmovq key, x0; \ - vpshufb .Lpack_bswap RIP, x0, x0; \ + vpshufb .Lpack_bswap rRIP, x0, x0; \ \ vpxor x0, y7, y7; \ vpxor x0, y6, y6; \ @@ -786,6 +776,7 @@ __camellia_enc_blk16: * %xmm0..%xmm15: 16 encrypted blocks, order swapped: * 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 */ + CFI_STARTPROC(); leaq 8 * 16(%rax), %rcx; @@ -859,6 +850,7 @@ __camellia_enc_blk16: %xmm15, %rax, %rcx, 24); jmp .Lenc_done; + CFI_ENDPROC(); ELF(.size __camellia_enc_blk16,.-__camellia_enc_blk16;) .align 8 @@ -874,6 +866,7 @@ __camellia_dec_blk16: * %xmm0..%xmm15: 16 plaintext blocks, order swapped: * 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 */ + CFI_STARTPROC(); leaq 8 * 16(%rax), %rcx; @@ -944,6 +937,7 @@ __camellia_dec_blk16: ((key_table + (24) * 8) + 4)(CTX)); jmp .Ldec_max24; + CFI_ENDPROC(); ELF(.size __camellia_dec_blk16,.-__camellia_dec_blk16;) #define inc_le128(x, minus_one, tmp) \ @@ -963,9 +957,12 @@ _gcry_camellia_aesni_avx_ctr_enc: * %rdx: src (16 blocks) * %rcx: iv (big endian, 128bit) */ + CFI_STARTPROC(); pushq %rbp; + CFI_PUSH(%rbp); movq %rsp, %rbp; + CFI_DEF_CFA_REGISTER(%rbp); vzeroupper; @@ -973,7 +970,7 @@ _gcry_camellia_aesni_avx_ctr_enc: andq $~31, %rsp; movq %rsp, %rax; - vmovdqa .Lbswap128_mask RIP, %xmm14; + vmovdqa .Lbswap128_mask rRIP, %xmm14; /* load IV and byteswap */ vmovdqu (%rcx), %xmm15; @@ -1018,12 +1015,12 @@ _gcry_camellia_aesni_avx_ctr_enc: vmovdqa %xmm0, %xmm13; vpshufb %xmm14, %xmm0, %xmm0; inc_le128(%xmm13, %xmm15, %xmm14); - vpshufb .Lbswap128_mask RIP, %xmm13, %xmm13; /* le => be */ + vpshufb .Lbswap128_mask rRIP, %xmm13, %xmm13; /* le => be */ vmovdqu %xmm13, (%rcx); /* inpack16_pre: */ vmovq (key_table)(CTX), %xmm15; - vpshufb .Lpack_bswap RIP, %xmm15, %xmm15; + vpshufb .Lpack_bswap rRIP, %xmm15, %xmm15; vpxor %xmm0, %xmm15, %xmm0; vpxor %xmm1, %xmm15, %xmm1; vpxor %xmm2, %xmm15, %xmm2; @@ -1067,7 +1064,9 @@ _gcry_camellia_aesni_avx_ctr_enc: vzeroall; leave; + CFI_LEAVE(); ret; + CFI_ENDPROC(); ELF(.size _gcry_camellia_aesni_avx_ctr_enc,.-_gcry_camellia_aesni_avx_ctr_enc;) .align 8 @@ -1081,9 +1080,12 @@ _gcry_camellia_aesni_avx_cbc_dec: * %rdx: src (16 blocks) * %rcx: iv */ + CFI_STARTPROC(); pushq %rbp; + CFI_PUSH(%rbp); movq %rsp, %rbp; + CFI_DEF_CFA_REGISTER(%rbp); vzeroupper; @@ -1135,7 +1137,9 @@ _gcry_camellia_aesni_avx_cbc_dec: vzeroall; leave; + CFI_LEAVE(); ret; + CFI_ENDPROC(); ELF(.size _gcry_camellia_aesni_avx_cbc_dec,.-_gcry_camellia_aesni_avx_cbc_dec;) .align 8 @@ -1149,9 +1153,12 @@ _gcry_camellia_aesni_avx_cfb_dec: * %rdx: src (16 blocks) * %rcx: iv */ + CFI_STARTPROC(); pushq %rbp; + CFI_PUSH(%rbp); movq %rsp, %rbp; + CFI_DEF_CFA_REGISTER(%rbp); vzeroupper; @@ -1161,7 +1168,7 @@ _gcry_camellia_aesni_avx_cfb_dec: /* inpack16_pre: */ vmovq (key_table)(CTX), %xmm0; - vpshufb .Lpack_bswap RIP, %xmm0, %xmm0; + vpshufb .Lpack_bswap rRIP, %xmm0, %xmm0; vpxor (%rcx), %xmm0, %xmm15; vmovdqu 15 * 16(%rdx), %xmm1; vmovdqu %xmm1, (%rcx); /* store new IV */ @@ -1207,7 +1214,9 @@ _gcry_camellia_aesni_avx_cfb_dec: vzeroall; leave; + CFI_LEAVE(); ret; + CFI_ENDPROC(); ELF(.size _gcry_camellia_aesni_avx_cfb_dec,.-_gcry_camellia_aesni_avx_cfb_dec;) .align 8 @@ -1223,9 +1232,12 @@ _gcry_camellia_aesni_avx_ocb_enc: * %r8 : checksum * %r9 : L pointers (void *L[16]) */ + CFI_STARTPROC(); pushq %rbp; + CFI_PUSH(%rbp); movq %rsp, %rbp; + CFI_DEF_CFA_REGISTER(%rbp); vzeroupper; @@ -1233,10 +1245,14 @@ _gcry_camellia_aesni_avx_ocb_enc: andq $~31, %rsp; movq %rsp, %rax; - movq %r10, (16 * 16 + 0 * 8)(%rax); - movq %r11, (16 * 16 + 1 * 8)(%rax); - movq %r12, (16 * 16 + 2 * 8)(%rax); - movq %r13, (16 * 16 + 3 * 8)(%rax); + movq %r10, (16 * 16 + 0 * 8)(%rsp); + movq %r11, (16 * 16 + 1 * 8)(%rsp); + movq %r12, (16 * 16 + 2 * 8)(%rsp); + movq %r13, (16 * 16 + 3 * 8)(%rsp); + CFI_REG_ON_STACK(r10, 16 * 16 + 0 * 8); + CFI_REG_ON_STACK(r11, 16 * 16 + 1 * 8); + CFI_REG_ON_STACK(r12, 16 * 16 + 2 * 8); + CFI_REG_ON_STACK(r13, 16 * 16 + 3 * 8); vmovdqu (%rcx), %xmm14; vmovdqu (%r8), %xmm15; @@ -1292,7 +1308,7 @@ _gcry_camellia_aesni_avx_ocb_enc: /* inpack16_pre: */ vmovq (key_table)(CTX), %xmm15; - vpshufb .Lpack_bswap RIP, %xmm15, %xmm15; + vpshufb .Lpack_bswap rRIP, %xmm15, %xmm15; vpxor %xmm0, %xmm15, %xmm0; vpxor %xmm1, %xmm15, %xmm1; vpxor %xmm2, %xmm15, %xmm2; @@ -1335,13 +1351,19 @@ _gcry_camellia_aesni_avx_ocb_enc: vzeroall; - movq (16 * 16 + 0 * 8)(%rax), %r10; - movq (16 * 16 + 1 * 8)(%rax), %r11; - movq (16 * 16 + 2 * 8)(%rax), %r12; - movq (16 * 16 + 3 * 8)(%rax), %r13; + movq (16 * 16 + 0 * 8)(%rsp), %r10; + movq (16 * 16 + 1 * 8)(%rsp), %r11; + movq (16 * 16 + 2 * 8)(%rsp), %r12; + movq (16 * 16 + 3 * 8)(%rsp), %r13; + CFI_RESTORE(%r10); + CFI_RESTORE(%r11); + CFI_RESTORE(%r12); + CFI_RESTORE(%r13); leave; + CFI_LEAVE(); ret; + CFI_ENDPROC(); ELF(.size _gcry_camellia_aesni_avx_ocb_enc,.-_gcry_camellia_aesni_avx_ocb_enc;) .align 8 @@ -1357,9 +1379,12 @@ _gcry_camellia_aesni_avx_ocb_dec: * %r8 : checksum * %r9 : L pointers (void *L[16]) */ + CFI_STARTPROC(); pushq %rbp; + CFI_PUSH(%rbp); movq %rsp, %rbp; + CFI_DEF_CFA_REGISTER(%rbp); vzeroupper; @@ -1367,10 +1392,14 @@ _gcry_camellia_aesni_avx_ocb_dec: andq $~31, %rsp; movq %rsp, %rax; - movq %r10, (16 * 16 + 0 * 8)(%rax); - movq %r11, (16 * 16 + 1 * 8)(%rax); - movq %r12, (16 * 16 + 2 * 8)(%rax); - movq %r13, (16 * 16 + 3 * 8)(%rax); + movq %r10, (16 * 16 + 0 * 8)(%rsp); + movq %r11, (16 * 16 + 1 * 8)(%rsp); + movq %r12, (16 * 16 + 2 * 8)(%rsp); + movq %r13, (16 * 16 + 3 * 8)(%rsp); + CFI_REG_ON_STACK(r10, 16 * 16 + 0 * 8); + CFI_REG_ON_STACK(r11, 16 * 16 + 1 * 8); + CFI_REG_ON_STACK(r12, 16 * 16 + 2 * 8); + CFI_REG_ON_STACK(r13, 16 * 16 + 3 * 8); vmovdqu (%rcx), %xmm15; @@ -1428,7 +1457,7 @@ _gcry_camellia_aesni_avx_ocb_dec: /* inpack16_pre: */ vmovq (key_table)(CTX, %r8, 8), %xmm15; - vpshufb .Lpack_bswap RIP, %xmm15, %xmm15; + vpshufb .Lpack_bswap rRIP, %xmm15, %xmm15; vpxor %xmm0, %xmm15, %xmm0; vpxor %xmm1, %xmm15, %xmm1; vpxor %xmm2, %xmm15, %xmm2; @@ -1493,13 +1522,19 @@ _gcry_camellia_aesni_avx_ocb_dec: vzeroall; - movq (16 * 16 + 0 * 8)(%rax), %r10; - movq (16 * 16 + 1 * 8)(%rax), %r11; - movq (16 * 16 + 2 * 8)(%rax), %r12; - movq (16 * 16 + 3 * 8)(%rax), %r13; + movq (16 * 16 + 0 * 8)(%rsp), %r10; + movq (16 * 16 + 1 * 8)(%rsp), %r11; + movq (16 * 16 + 2 * 8)(%rsp), %r12; + movq (16 * 16 + 3 * 8)(%rsp), %r13; + CFI_RESTORE(%r10); + CFI_RESTORE(%r11); + CFI_RESTORE(%r12); + CFI_RESTORE(%r13); leave; + CFI_LEAVE(); ret; + CFI_ENDPROC(); ELF(.size _gcry_camellia_aesni_avx_ocb_dec,.-_gcry_camellia_aesni_avx_ocb_dec;) .align 8 @@ -1514,9 +1549,12 @@ _gcry_camellia_aesni_avx_ocb_auth: * %rcx: checksum * %r8 : L pointers (void *L[16]) */ + CFI_STARTPROC(); pushq %rbp; + CFI_PUSH(%rbp); movq %rsp, %rbp; + CFI_DEF_CFA_REGISTER(%rbp); vzeroupper; @@ -1524,10 +1562,14 @@ _gcry_camellia_aesni_avx_ocb_auth: andq $~31, %rsp; movq %rsp, %rax; - movq %r10, (16 * 16 + 0 * 8)(%rax); - movq %r11, (16 * 16 + 1 * 8)(%rax); - movq %r12, (16 * 16 + 2 * 8)(%rax); - movq %r13, (16 * 16 + 3 * 8)(%rax); + movq %r10, (16 * 16 + 0 * 8)(%rsp); + movq %r11, (16 * 16 + 1 * 8)(%rsp); + movq %r12, (16 * 16 + 2 * 8)(%rsp); + movq %r13, (16 * 16 + 3 * 8)(%rsp); + CFI_REG_ON_STACK(r10, 16 * 16 + 0 * 8); + CFI_REG_ON_STACK(r11, 16 * 16 + 1 * 8); + CFI_REG_ON_STACK(r12, 16 * 16 + 2 * 8); + CFI_REG_ON_STACK(r13, 16 * 16 + 3 * 8); vmovdqu (%rdx), %xmm15; @@ -1580,7 +1622,7 @@ _gcry_camellia_aesni_avx_ocb_auth: /* inpack16_pre: */ vmovq (key_table)(CTX), %xmm15; - vpshufb .Lpack_bswap RIP, %xmm15, %xmm15; + vpshufb .Lpack_bswap rRIP, %xmm15, %xmm15; vpxor %xmm0, %xmm15, %xmm0; vpxor %xmm1, %xmm15, %xmm1; vpxor %xmm2, %xmm15, %xmm2; @@ -1623,13 +1665,19 @@ _gcry_camellia_aesni_avx_ocb_auth: vzeroall; - movq (16 * 16 + 0 * 8)(%rax), %r10; - movq (16 * 16 + 1 * 8)(%rax), %r11; - movq (16 * 16 + 2 * 8)(%rax), %r12; - movq (16 * 16 + 3 * 8)(%rax), %r13; + movq (16 * 16 + 0 * 8)(%rsp), %r10; + movq (16 * 16 + 1 * 8)(%rsp), %r11; + movq (16 * 16 + 2 * 8)(%rsp), %r12; + movq (16 * 16 + 3 * 8)(%rsp), %r13; + CFI_RESTORE(%r10); + CFI_RESTORE(%r11); + CFI_RESTORE(%r12); + CFI_RESTORE(%r13); leave; + CFI_LEAVE(); ret; + CFI_ENDPROC(); ELF(.size _gcry_camellia_aesni_avx_ocb_auth,.-_gcry_camellia_aesni_avx_ocb_auth;) /* @@ -1657,8 +1705,8 @@ ELF(.size _gcry_camellia_aesni_avx_ocb_auth,.-_gcry_camellia_aesni_avx_ocb_auth; vpand sbox4mask, t0, t0; \ vpor t0, x, x; \ \ - vmovdqa .Lpost_tf_lo_s1 RIP, t0; \ - vmovdqa .Lpost_tf_hi_s1 RIP, t1; \ + vmovdqa .Lpost_tf_lo_s1 rRIP, t0; \ + vmovdqa .Lpost_tf_hi_s1 rRIP, t1; \ \ /* prefilter sboxes */ \ filter_8bit(x, pre_s1lo_mask, pre_s1hi_mask, _0f0f0f0fmask, t2); \ @@ -1672,18 +1720,18 @@ ELF(.size _gcry_camellia_aesni_avx_ocb_auth,.-_gcry_camellia_aesni_avx_ocb_auth; /* output rotation for sbox2 (<<< 1) */ \ /* output rotation for sbox3 (>>> 1) */ \ vpshufb inv_shift_row, x, t1; \ - vpshufb .Lsp0044440444044404mask RIP, x, t4; \ - vpshufb .Lsp1110111010011110mask RIP, x, x; \ + vpshufb .Lsp0044440444044404mask rRIP, x, t4; \ + vpshufb .Lsp1110111010011110mask rRIP, x, x; \ vpaddb t1, t1, t2; \ vpsrlw $7, t1, t0; \ vpsllw $7, t1, t3; \ vpor t0, t2, t0; \ vpsrlw $1, t1, t1; \ - vpshufb .Lsp0222022222000222mask RIP, t0, t0; \ + vpshufb .Lsp0222022222000222mask rRIP, t0, t0; \ vpor t1, t3, t1; \ \ vpxor x, t4, t4; \ - vpshufb .Lsp3033303303303033mask RIP, t1, t1; \ + vpshufb .Lsp3033303303303033mask rRIP, t1, t1; \ vpxor t4, t0, t0; \ vpxor t1, t0, t0; \ vpsrldq $8, t0, x; \ @@ -1741,17 +1789,19 @@ __camellia_avx_setup128: * %rdi: ctx, CTX; subkey storage at key_table(CTX) * %xmm0: key */ + CFI_STARTPROC(); + #define cmll_sub(n, ctx) (key_table+((n)*8))(ctx) #define KL128 %xmm0 #define KA128 %xmm2 - vpshufb .Lbswap128_mask RIP, KL128, KL128; + vpshufb .Lbswap128_mask rRIP, KL128, KL128; - vmovdqa .Linv_shift_row_and_unpcklbw RIP, %xmm11; - vmovq .Lsbox4_input_mask RIP, %xmm12; - vbroadcastss .L0f0f0f0f RIP, %xmm13; - vmovdqa .Lpre_tf_lo_s1 RIP, %xmm14; - vmovdqa .Lpre_tf_hi_s1 RIP, %xmm15; + vmovdqa .Linv_shift_row_and_unpcklbw rRIP, %xmm11; + vmovq .Lsbox4_input_mask rRIP, %xmm12; + vbroadcastss .L0f0f0f0f rRIP, %xmm13; + vmovdqa .Lpre_tf_lo_s1 rRIP, %xmm14; + vmovdqa .Lpre_tf_hi_s1 rRIP, %xmm15; /* * Generate KA @@ -1763,18 +1813,18 @@ __camellia_avx_setup128: camellia_f(%xmm2, %xmm4, %xmm1, %xmm5, %xmm6, %xmm7, %xmm8, - %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma1 RIP); + %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma1 rRIP); vpxor %xmm4, %xmm3, %xmm3; camellia_f(%xmm3, %xmm2, %xmm1, %xmm5, %xmm6, %xmm7, %xmm8, - %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma2 RIP); + %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma2 rRIP); camellia_f(%xmm2, %xmm3, %xmm1, %xmm5, %xmm6, %xmm7, %xmm8, - %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma3 RIP); + %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma3 rRIP); vpxor %xmm4, %xmm3, %xmm3; camellia_f(%xmm3, %xmm4, %xmm1, %xmm5, %xmm6, %xmm7, %xmm8, - %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma4 RIP); + %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma4 rRIP); vpslldq $8, %xmm3, %xmm3; vpxor %xmm4, %xmm2, %xmm2; @@ -2076,6 +2126,7 @@ __camellia_avx_setup128: vzeroall; ret; + CFI_ENDPROC(); ELF(.size __camellia_avx_setup128,.-__camellia_avx_setup128;) .align 8 @@ -2086,19 +2137,21 @@ __camellia_avx_setup256: * %rdi: ctx, CTX; subkey storage at key_table(CTX) * %xmm0 & %xmm1: key */ + CFI_STARTPROC(); + #define KL128 %xmm0 #define KR128 %xmm1 #define KA128 %xmm2 #define KB128 %xmm3 - vpshufb .Lbswap128_mask RIP, KL128, KL128; - vpshufb .Lbswap128_mask RIP, KR128, KR128; + vpshufb .Lbswap128_mask rRIP, KL128, KL128; + vpshufb .Lbswap128_mask rRIP, KR128, KR128; - vmovdqa .Linv_shift_row_and_unpcklbw RIP, %xmm11; - vmovq .Lsbox4_input_mask RIP, %xmm12; - vbroadcastss .L0f0f0f0f RIP, %xmm13; - vmovdqa .Lpre_tf_lo_s1 RIP, %xmm14; - vmovdqa .Lpre_tf_hi_s1 RIP, %xmm15; + vmovdqa .Linv_shift_row_and_unpcklbw rRIP, %xmm11; + vmovq .Lsbox4_input_mask rRIP, %xmm12; + vbroadcastss .L0f0f0f0f rRIP, %xmm13; + vmovdqa .Lpre_tf_lo_s1 rRIP, %xmm14; + vmovdqa .Lpre_tf_hi_s1 rRIP, %xmm15; /* * Generate KA @@ -2111,20 +2164,20 @@ __camellia_avx_setup256: camellia_f(%xmm2, %xmm4, %xmm5, %xmm7, %xmm8, %xmm9, %xmm10, - %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma1 RIP); + %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma1 rRIP); vpxor %xmm4, %xmm3, %xmm3; camellia_f(%xmm3, %xmm2, %xmm5, %xmm7, %xmm8, %xmm9, %xmm10, - %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma2 RIP); + %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma2 rRIP); vpxor %xmm6, %xmm2, %xmm2; camellia_f(%xmm2, %xmm3, %xmm5, %xmm7, %xmm8, %xmm9, %xmm10, - %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma3 RIP); + %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma3 rRIP); vpxor %xmm4, %xmm3, %xmm3; vpxor KR128, %xmm3, %xmm3; camellia_f(%xmm3, %xmm4, %xmm5, %xmm7, %xmm8, %xmm9, %xmm10, - %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma4 RIP); + %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma4 rRIP); vpslldq $8, %xmm3, %xmm3; vpxor %xmm4, %xmm2, %xmm2; @@ -2142,12 +2195,12 @@ __camellia_avx_setup256: camellia_f(%xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9, %xmm10, - %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma5 RIP); + %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma5 rRIP); vpxor %xmm5, %xmm3, %xmm3; camellia_f(%xmm3, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9, %xmm10, - %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma6 RIP); + %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma6 rRIP); vpslldq $8, %xmm3, %xmm3; vpxor %xmm5, %xmm4, %xmm4; vpsrldq $8, %xmm3, %xmm3; @@ -2553,6 +2606,7 @@ __camellia_avx_setup256: vzeroall; ret; + CFI_ENDPROC(); ELF(.size __camellia_avx_setup256,.-__camellia_avx_setup256;) .align 8 @@ -2565,6 +2619,7 @@ _gcry_camellia_aesni_avx_keygen: * %rsi: key * %rdx: keylen */ + CFI_STARTPROC(); vzeroupper; @@ -2585,6 +2640,7 @@ _gcry_camellia_aesni_avx_keygen: vpor %xmm2, %xmm1, %xmm1; jmp __camellia_avx_setup256; + CFI_ENDPROC(); ELF(.size _gcry_camellia_aesni_avx_keygen,.-_gcry_camellia_aesni_avx_keygen;) #endif /*defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT)*/ diff --git a/cipher/camellia-aesni-avx2-amd64.S b/cipher/camellia-aesni-avx2-amd64.S index 897e4aee..cc01c774 100644 --- a/cipher/camellia-aesni-avx2-amd64.S +++ b/cipher/camellia-aesni-avx2-amd64.S @@ -24,17 +24,7 @@ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT) -#ifdef __PIC__ -# define RIP (%rip) -#else -# define RIP -#endif - -#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS -# define ELF(...) __VA_ARGS__ -#else -# define ELF(...) /*_*/ -#endif +#include "asm-common-amd64.h" #define CAMELLIA_TABLE_BYTE_LEN 272 @@ -92,12 +82,12 @@ /* \ * S-function with AES subbytes \ */ \ - vbroadcasti128 .Linv_shift_row RIP, t4; \ - vpbroadcastd .L0f0f0f0f RIP, t7; \ - vbroadcasti128 .Lpre_tf_lo_s1 RIP, t5; \ - vbroadcasti128 .Lpre_tf_hi_s1 RIP, t6; \ - vbroadcasti128 .Lpre_tf_lo_s4 RIP, t2; \ - vbroadcasti128 .Lpre_tf_hi_s4 RIP, t3; \ + vbroadcasti128 .Linv_shift_row rRIP, t4; \ + vpbroadcastd .L0f0f0f0f rRIP, t7; \ + vbroadcasti128 .Lpre_tf_lo_s1 rRIP, t5; \ + vbroadcasti128 .Lpre_tf_hi_s1 rRIP, t6; \ + vbroadcasti128 .Lpre_tf_lo_s4 rRIP, t2; \ + vbroadcasti128 .Lpre_tf_hi_s4 rRIP, t3; \ \ /* AES inverse shift rows */ \ vpshufb t4, x0, x0; \ @@ -143,8 +133,8 @@ vinserti128 $1, t2##_x, x6, x6; \ vextracti128 $1, x1, t3##_x; \ vextracti128 $1, x4, t2##_x; \ - vbroadcasti128 .Lpost_tf_lo_s1 RIP, t0; \ - vbroadcasti128 .Lpost_tf_hi_s1 RIP, t1; \ + vbroadcasti128 .Lpost_tf_lo_s1 rRIP, t0; \ + vbroadcasti128 .Lpost_tf_hi_s1 rRIP, t1; \ vaesenclast t4##_x, x2##_x, x2##_x; \ vaesenclast t4##_x, t6##_x, t6##_x; \ vaesenclast t4##_x, x5##_x, x5##_x; \ @@ -159,16 +149,16 @@ vinserti128 $1, t2##_x, x4, x4; \ \ /* postfilter sboxes 1 and 4 */ \ - vbroadcasti128 .Lpost_tf_lo_s3 RIP, t2; \ - vbroadcasti128 .Lpost_tf_hi_s3 RIP, t3; \ + vbroadcasti128 .Lpost_tf_lo_s3 rRIP, t2; \ + vbroadcasti128 .Lpost_tf_hi_s3 rRIP, t3; \ filter_8bit(x0, t0, t1, t7, t4); \ filter_8bit(x7, t0, t1, t7, t4); \ filter_8bit(x3, t0, t1, t7, t6); \ filter_8bit(x6, t0, t1, t7, t6); \ \ /* postfilter sbox 3 */ \ - vbroadcasti128 .Lpost_tf_lo_s2 RIP, t4; \ - vbroadcasti128 .Lpost_tf_hi_s2 RIP, t5; \ + vbroadcasti128 .Lpost_tf_lo_s2 rRIP, t4; \ + vbroadcasti128 .Lpost_tf_hi_s2 rRIP, t5; \ filter_8bit(x2, t2, t3, t7, t6); \ filter_8bit(x5, t2, t3, t7, t6); \ \ @@ -485,7 +475,7 @@ transpose_4x4(c0, c1, c2, c3, a0, a1); \ transpose_4x4(d0, d1, d2, d3, a0, a1); \ \ - vbroadcasti128 .Lshufb_16x16b RIP, a0; \ + vbroadcasti128 .Lshufb_16x16b rRIP, a0; \ vmovdqu st1, a1; \ vpshufb a0, a2, a2; \ vpshufb a0, a3, a3; \ @@ -524,7 +514,7 @@ #define inpack32_pre(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ y6, y7, rio, key) \ vpbroadcastq key, x0; \ - vpshufb .Lpack_bswap RIP, x0, x0; \ + vpshufb .Lpack_bswap rRIP, x0, x0; \ \ vpxor 0 * 32(rio), x0, y7; \ vpxor 1 * 32(rio), x0, y6; \ @@ -575,7 +565,7 @@ vmovdqu x0, stack_tmp0; \ \ vpbroadcastq key, x0; \ - vpshufb .Lpack_bswap RIP, x0, x0; \ + vpshufb .Lpack_bswap rRIP, x0, x0; \ \ vpxor x0, y7, y7; \ vpxor x0, y6, y6; \ @@ -765,6 +755,7 @@ __camellia_enc_blk32: * %ymm0..%ymm15: 32 encrypted blocks, order swapped: * 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 */ + CFI_STARTPROC(); leaq 8 * 32(%rax), %rcx; @@ -838,6 +829,7 @@ __camellia_enc_blk32: %ymm15, %rax, %rcx, 24); jmp .Lenc_done; + CFI_ENDPROC(); ELF(.size __camellia_enc_blk32,.-__camellia_enc_blk32;) .align 8 @@ -853,6 +845,7 @@ __camellia_dec_blk32: * %ymm0..%ymm15: 16 plaintext blocks, order swapped: * 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 */ + CFI_STARTPROC(); leaq 8 * 32(%rax), %rcx; @@ -923,6 +916,7 @@ __camellia_dec_blk32: ((key_table + (24) * 8) + 4)(CTX)); jmp .Ldec_max24; + CFI_ENDPROC(); ELF(.size __camellia_dec_blk32,.-__camellia_dec_blk32;) #define inc_le128(x, minus_one, tmp) \ @@ -942,9 +936,12 @@ _gcry_camellia_aesni_avx2_ctr_enc: * %rdx: src (32 blocks) * %rcx: iv (big endian, 128bit) */ + CFI_STARTPROC(); pushq %rbp; + CFI_PUSH(%rbp); movq %rsp, %rbp; + CFI_DEF_CFA_REGISTER(%rbp); movq 8(%rcx), %r11; bswapq %r11; @@ -960,10 +957,10 @@ _gcry_camellia_aesni_avx2_ctr_enc: /* load IV and byteswap */ vmovdqu (%rcx), %xmm0; - vpshufb .Lbswap128_mask RIP, %xmm0, %xmm0; + vpshufb .Lbswap128_mask rRIP, %xmm0, %xmm0; vmovdqa %xmm0, %xmm1; inc_le128(%xmm0, %xmm15, %xmm14); - vbroadcasti128 .Lbswap128_mask RIP, %ymm14; + vbroadcasti128 .Lbswap128_mask rRIP, %ymm14; vinserti128 $1, %xmm0, %ymm1, %ymm0; vpshufb %ymm14, %ymm0, %ymm13; vmovdqu %ymm13, 15 * 32(%rax); @@ -1064,14 +1061,14 @@ _gcry_camellia_aesni_avx2_ctr_enc: vextracti128 $1, %ymm0, %xmm13; vpshufb %ymm14, %ymm0, %ymm0; inc_le128(%xmm13, %xmm15, %xmm14); - vpshufb .Lbswap128_mask RIP, %xmm13, %xmm13; + vpshufb .Lbswap128_mask rRIP, %xmm13, %xmm13; vmovdqu %xmm13, (%rcx); .align 4 .Lload_ctr_done: /* inpack16_pre: */ vpbroadcastq (key_table)(CTX), %ymm15; - vpshufb .Lpack_bswap RIP, %ymm15, %ymm15; + vpshufb .Lpack_bswap rRIP, %ymm15, %ymm15; vpxor %ymm0, %ymm15, %ymm0; vpxor %ymm1, %ymm15, %ymm1; vpxor %ymm2, %ymm15, %ymm2; @@ -1116,7 +1113,9 @@ _gcry_camellia_aesni_avx2_ctr_enc: vzeroall; leave; + CFI_LEAVE(); ret; + CFI_ENDPROC(); ELF(.size _gcry_camellia_aesni_avx2_ctr_enc,.-_gcry_camellia_aesni_avx2_ctr_enc;) .align 8 @@ -1130,9 +1129,12 @@ _gcry_camellia_aesni_avx2_cbc_dec: * %rdx: src (32 blocks) * %rcx: iv */ + CFI_STARTPROC(); pushq %rbp; + CFI_PUSH(%rbp); movq %rsp, %rbp; + CFI_DEF_CFA_REGISTER(%rbp); vzeroupper; @@ -1188,7 +1190,9 @@ _gcry_camellia_aesni_avx2_cbc_dec: vzeroall; leave; + CFI_LEAVE(); ret; + CFI_ENDPROC(); ELF(.size _gcry_camellia_aesni_avx2_cbc_dec,.-_gcry_camellia_aesni_avx2_cbc_dec;) .align 8 @@ -1202,9 +1206,12 @@ _gcry_camellia_aesni_avx2_cfb_dec: * %rdx: src (32 blocks) * %rcx: iv */ + CFI_STARTPROC(); pushq %rbp; + CFI_PUSH(%rbp); movq %rsp, %rbp; + CFI_DEF_CFA_REGISTER(%rbp); vzeroupper; @@ -1214,7 +1221,7 @@ _gcry_camellia_aesni_avx2_cfb_dec: /* inpack16_pre: */ vpbroadcastq (key_table)(CTX), %ymm0; - vpshufb .Lpack_bswap RIP, %ymm0, %ymm0; + vpshufb .Lpack_bswap rRIP, %ymm0, %ymm0; vmovdqu (%rcx), %xmm15; vinserti128 $1, (%rdx), %ymm15, %ymm15; vpxor %ymm15, %ymm0, %ymm15; @@ -1262,7 +1269,9 @@ _gcry_camellia_aesni_avx2_cfb_dec: vzeroall; leave; + CFI_LEAVE(); ret; + CFI_ENDPROC(); ELF(.size _gcry_camellia_aesni_avx2_cfb_dec,.-_gcry_camellia_aesni_avx2_cfb_dec;) .align 8 @@ -1278,9 +1287,12 @@ _gcry_camellia_aesni_avx2_ocb_enc: * %r8 : checksum * %r9 : L pointers (void *L[32]) */ + CFI_STARTPROC(); pushq %rbp; + CFI_PUSH(%rbp); movq %rsp, %rbp; + CFI_DEF_CFA_REGISTER(%rbp); vzeroupper; @@ -1288,10 +1300,14 @@ _gcry_camellia_aesni_avx2_ocb_enc: andq $~63, %rsp; movq %rsp, %rax; - movq %r10, (16 * 32 + 0 * 8)(%rax); - movq %r11, (16 * 32 + 1 * 8)(%rax); - movq %r12, (16 * 32 + 2 * 8)(%rax); - movq %r13, (16 * 32 + 3 * 8)(%rax); + movq %r10, (16 * 32 + 0 * 8)(%rsp); + movq %r11, (16 * 32 + 1 * 8)(%rsp); + movq %r12, (16 * 32 + 2 * 8)(%rsp); + movq %r13, (16 * 32 + 3 * 8)(%rsp); + CFI_REG_ON_STACK(r10, 16 * 32 + 0 * 8); + CFI_REG_ON_STACK(r11, 16 * 32 + 1 * 8); + CFI_REG_ON_STACK(r12, 16 * 32 + 2 * 8); + CFI_REG_ON_STACK(r13, 16 * 32 + 3 * 8); vmovdqu (%rcx), %xmm14; vmovdqu (%r8), %xmm13; @@ -1369,7 +1385,7 @@ _gcry_camellia_aesni_avx2_ocb_enc: /* inpack16_pre: */ vpbroadcastq (key_table)(CTX), %ymm15; - vpshufb .Lpack_bswap RIP, %ymm15, %ymm15; + vpshufb .Lpack_bswap rRIP, %ymm15, %ymm15; vpxor %ymm0, %ymm15, %ymm0; vpxor %ymm1, %ymm15, %ymm1; vpxor %ymm2, %ymm15, %ymm2; @@ -1412,13 +1428,19 @@ _gcry_camellia_aesni_avx2_ocb_enc: vzeroall; - movq (16 * 32 + 0 * 8)(%rax), %r10; - movq (16 * 32 + 1 * 8)(%rax), %r11; - movq (16 * 32 + 2 * 8)(%rax), %r12; - movq (16 * 32 + 3 * 8)(%rax), %r13; + movq (16 * 32 + 0 * 8)(%rsp), %r10; + movq (16 * 32 + 1 * 8)(%rsp), %r11; + movq (16 * 32 + 2 * 8)(%rsp), %r12; + movq (16 * 32 + 3 * 8)(%rsp), %r13; + CFI_RESTORE(%r10); + CFI_RESTORE(%r11); + CFI_RESTORE(%r12); + CFI_RESTORE(%r13); leave; + CFI_LEAVE(); ret; + CFI_ENDPROC(); ELF(.size _gcry_camellia_aesni_avx2_ocb_enc,.-_gcry_camellia_aesni_avx2_ocb_enc;) .align 8 @@ -1434,9 +1456,12 @@ _gcry_camellia_aesni_avx2_ocb_dec: * %r8 : checksum * %r9 : L pointers (void *L[32]) */ + CFI_STARTPROC(); pushq %rbp; + CFI_PUSH(%rbp); movq %rsp, %rbp; + CFI_DEF_CFA_REGISTER(%rbp); vzeroupper; @@ -1444,10 +1469,14 @@ _gcry_camellia_aesni_avx2_ocb_dec: andq $~63, %rsp; movq %rsp, %rax; - movq %r10, (16 * 32 + 0 * 8)(%rax); - movq %r11, (16 * 32 + 1 * 8)(%rax); - movq %r12, (16 * 32 + 2 * 8)(%rax); - movq %r13, (16 * 32 + 3 * 8)(%rax); + movq %r10, (16 * 32 + 0 * 8)(%rsp); + movq %r11, (16 * 32 + 1 * 8)(%rsp); + movq %r12, (16 * 32 + 2 * 8)(%rsp); + movq %r13, (16 * 32 + 3 * 8)(%rsp); + CFI_REG_ON_STACK(r10, 16 * 32 + 0 * 8); + CFI_REG_ON_STACK(r11, 16 * 32 + 1 * 8); + CFI_REG_ON_STACK(r12, 16 * 32 + 2 * 8); + CFI_REG_ON_STACK(r13, 16 * 32 + 3 * 8); vmovdqu (%rcx), %xmm14; @@ -1525,7 +1554,7 @@ _gcry_camellia_aesni_avx2_ocb_dec: /* inpack16_pre: */ vpbroadcastq (key_table)(CTX, %r8, 8), %ymm15; - vpshufb .Lpack_bswap RIP, %ymm15, %ymm15; + vpshufb .Lpack_bswap rRIP, %ymm15, %ymm15; vpxor %ymm0, %ymm15, %ymm0; vpxor %ymm1, %ymm15, %ymm1; vpxor %ymm2, %ymm15, %ymm2; @@ -1596,13 +1625,19 @@ _gcry_camellia_aesni_avx2_ocb_dec: vzeroall; - movq (16 * 32 + 0 * 8)(%rax), %r10; - movq (16 * 32 + 1 * 8)(%rax), %r11; - movq (16 * 32 + 2 * 8)(%rax), %r12; - movq (16 * 32 + 3 * 8)(%rax), %r13; + movq (16 * 32 + 0 * 8)(%rsp), %r10; + movq (16 * 32 + 1 * 8)(%rsp), %r11; + movq (16 * 32 + 2 * 8)(%rsp), %r12; + movq (16 * 32 + 3 * 8)(%rsp), %r13; + CFI_RESTORE(%r10); + CFI_RESTORE(%r11); + CFI_RESTORE(%r12); + CFI_RESTORE(%r13); leave; + CFI_LEAVE(); ret; + CFI_ENDPROC(); ELF(.size _gcry_camellia_aesni_avx2_ocb_dec,.-_gcry_camellia_aesni_avx2_ocb_dec;) .align 8 @@ -1617,9 +1652,12 @@ _gcry_camellia_aesni_avx2_ocb_auth: * %rcx: checksum * %r8 : L pointers (void *L[16]) */ + CFI_STARTPROC(); pushq %rbp; + CFI_PUSH(%rbp); movq %rsp, %rbp; + CFI_DEF_CFA_REGISTER(%rbp); vzeroupper; @@ -1627,10 +1665,14 @@ _gcry_camellia_aesni_avx2_ocb_auth: andq $~63, %rsp; movq %rsp, %rax; - movq %r10, (16 * 32 + 0 * 8)(%rax); - movq %r11, (16 * 32 + 1 * 8)(%rax); - movq %r12, (16 * 32 + 2 * 8)(%rax); - movq %r13, (16 * 32 + 3 * 8)(%rax); + movq %r10, (16 * 32 + 0 * 8)(%rsp); + movq %r11, (16 * 32 + 1 * 8)(%rsp); + movq %r12, (16 * 32 + 2 * 8)(%rsp); + movq %r13, (16 * 32 + 3 * 8)(%rsp); + CFI_REG_ON_STACK(r10, 16 * 32 + 0 * 8); + CFI_REG_ON_STACK(r11, 16 * 32 + 1 * 8); + CFI_REG_ON_STACK(r12, 16 * 32 + 2 * 8); + CFI_REG_ON_STACK(r13, 16 * 32 + 3 * 8); vmovdqu (%rdx), %xmm14; @@ -1703,7 +1745,7 @@ _gcry_camellia_aesni_avx2_ocb_auth: /* inpack16_pre: */ vpbroadcastq (key_table)(CTX), %ymm15; - vpshufb .Lpack_bswap RIP, %ymm15, %ymm15; + vpshufb .Lpack_bswap rRIP, %ymm15, %ymm15; vpxor %ymm0, %ymm15, %ymm0; vpxor %ymm1, %ymm15, %ymm1; vpxor %ymm2, %ymm15, %ymm2; @@ -1749,13 +1791,19 @@ _gcry_camellia_aesni_avx2_ocb_auth: vzeroall; - movq (16 * 32 + 0 * 8)(%rax), %r10; - movq (16 * 32 + 1 * 8)(%rax), %r11; - movq (16 * 32 + 2 * 8)(%rax), %r12; - movq (16 * 32 + 3 * 8)(%rax), %r13; + movq (16 * 32 + 0 * 8)(%rsp), %r10; + movq (16 * 32 + 1 * 8)(%rsp), %r11; + movq (16 * 32 + 2 * 8)(%rsp), %r12; + movq (16 * 32 + 3 * 8)(%rsp), %r13; + CFI_RESTORE(%r10); + CFI_RESTORE(%r11); + CFI_RESTORE(%r12); + CFI_RESTORE(%r13); leave; + CFI_LEAVE(); ret; + CFI_ENDPROC(); ELF(.size _gcry_camellia_aesni_avx2_ocb_auth,.-_gcry_camellia_aesni_avx2_ocb_auth;) #endif /*defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT)*/ diff --git a/cipher/cast5-amd64.S b/cipher/cast5-amd64.S index 1a1d43fd..82f67890 100644 --- a/cipher/cast5-amd64.S +++ b/cipher/cast5-amd64.S @@ -183,10 +183,13 @@ _gcry_cast5_amd64_encrypt_block: * %rsi: dst * %rdx: src */ + CFI_STARTPROC(); ENTER_SYSV_FUNC_PARAMS_0_4 pushq %rbp; + CFI_PUSH(%rbp); pushq %rbx; + CFI_PUSH(%rbx); movq %rsi, %r10; @@ -211,10 +214,13 @@ _gcry_cast5_amd64_encrypt_block: write_block(); popq %rbx; + CFI_POP(%rbx); popq %rbp; + CFI_POP(%rbp); EXIT_SYSV_FUNC ret; + CFI_ENDPROC(); ELF(.size _gcry_cast5_amd64_encrypt_block,.-_gcry_cast5_amd64_encrypt_block;) .align 8 @@ -227,10 +233,13 @@ _gcry_cast5_amd64_decrypt_block: * %rsi: dst * %rdx: src */ + CFI_STARTPROC(); ENTER_SYSV_FUNC_PARAMS_0_4 pushq %rbp; + CFI_PUSH(%rbp); pushq %rbx; + CFI_PUSH(%rbx); movq %rsi, %r10; @@ -255,10 +264,13 @@ _gcry_cast5_amd64_decrypt_block: write_block(); popq %rbx; + CFI_POP(%rbx); popq %rbp; + CFI_POP(%rbp); EXIT_SYSV_FUNC ret; + CFI_ENDPROC(); ELF(.size _gcry_cast5_amd64_decrypt_block,.-_gcry_cast5_amd64_decrypt_block;) /********************************************************************** @@ -371,6 +383,7 @@ __cast5_enc_blk4: * output: * RLR0,RLR1,RLR2,RLR3: four output ciphertext blocks */ + CFI_STARTPROC(); GET_EXTERN_POINTER(_gcry_cast5_s1to4, RTAB); get_round_km(0, RKM0d); @@ -387,6 +400,7 @@ __cast5_enc_blk4: outbswap_block4(RLR0, RLR1, RLR2, RLR3); ret; + CFI_ENDPROC(); ELF(.size __cast5_enc_blk4,.-__cast5_enc_blk4;) .align 8 @@ -399,6 +413,7 @@ __cast5_dec_blk4: * output: * RLR0,RLR1,RLR2,RLR3: four output plaintext blocks */ + CFI_STARTPROC(); GET_EXTERN_POINTER(_gcry_cast5_s1to4, RTAB); inbswap_block4(RLR0, RLR1, RLR2, RLR3); @@ -416,6 +431,7 @@ __cast5_dec_blk4: round_dec_last4(1, F4_2, F4_1); outbswap_block4(RLR0, RLR1, RLR2, RLR3); + CFI_ENDPROC(); ret; ELF(.size __cast5_dec_blk4,.-__cast5_dec_blk4;) @@ -425,20 +441,28 @@ ELF(.type _gcry_cast5_amd64_ctr_enc,@function;) _gcry_cast5_amd64_ctr_enc: /* input: * %rdi: ctx, CTX - * %rsi: dst (8 blocks) - * %rdx: src (8 blocks) + * %rsi: dst (4 blocks) + * %rdx: src (4 blocks) * %rcx: iv (big endian, 64bit) */ + CFI_STARTPROC(); ENTER_SYSV_FUNC_PARAMS_0_4 pushq %rbp; + CFI_PUSH(%rbp); pushq %rbx; + CFI_PUSH(%rbx); pushq %r12; + CFI_PUSH(%r12); pushq %r13; + CFI_PUSH(%r13); pushq %r14; + CFI_PUSH(%r14); pushq %rsi; + CFI_PUSH(%rsi); pushq %rdx; + CFI_PUSH(%rdx); /* load IV and byteswap */ movq (%rcx), RX0; @@ -458,7 +482,9 @@ _gcry_cast5_amd64_ctr_enc: call __cast5_enc_blk4; popq %r14; /*src*/ + CFI_POP_TMP_REG(); popq %r13; /*dst*/ + CFI_POP_TMP_REG(); /* XOR key-stream with plaintext */ xorq 0 * 8(%r14), RLR0; @@ -471,13 +497,19 @@ _gcry_cast5_amd64_ctr_enc: movq RLR3, 3 * 8(%r13); popq %r14; + CFI_POP(%r14); popq %r13; + CFI_POP(%r13); popq %r12; + CFI_POP(%r12); popq %rbx; + CFI_POP(%rbx); popq %rbp; + CFI_POP(%rbp); EXIT_SYSV_FUNC ret + CFI_ENDPROC(); ELF(.size _gcry_cast5_amd64_ctr_enc,.-_gcry_cast5_amd64_ctr_enc;) .align 8 @@ -486,21 +518,30 @@ ELF(.type _gcry_cast5_amd64_cbc_dec,@function;) _gcry_cast5_amd64_cbc_dec: /* input: * %rdi: ctx, CTX - * %rsi: dst (8 blocks) - * %rdx: src (8 blocks) + * %rsi: dst (4 blocks) + * %rdx: src (4 blocks) * %rcx: iv (64bit) */ + CFI_STARTPROC(); ENTER_SYSV_FUNC_PARAMS_0_4 pushq %rbp; + CFI_PUSH(%rbp); pushq %rbx; + CFI_PUSH(%rbx); pushq %r12; + CFI_PUSH(%r12); pushq %r13; + CFI_PUSH(%r13); pushq %r14; + CFI_PUSH(%r14); pushq %rcx; + CFI_PUSH(%rcx); pushq %rsi; + CFI_PUSH(%rsi); pushq %rdx; + CFI_PUSH(%rdx); /* load input */ movq 0 * 8(%rdx), RLR0; @@ -511,8 +552,11 @@ _gcry_cast5_amd64_cbc_dec: call __cast5_dec_blk4; popq RX0; /*src*/ + CFI_POP_TMP_REG(); popq RX1; /*dst*/ + CFI_POP_TMP_REG(); popq RX2; /*iv*/ + CFI_POP_TMP_REG(); movq 3 * 8(RX0), %r14; xorq (RX2), RLR0; @@ -527,14 +571,19 @@ _gcry_cast5_amd64_cbc_dec: movq RLR3, 3 * 8(RX1); popq %r14; + CFI_POP(%r14); popq %r13; + CFI_POP(%r13); popq %r12; + CFI_POP(%r12); popq %rbx; + CFI_POP(%rbx); popq %rbp; + CFI_POP(%rbp); EXIT_SYSV_FUNC ret; - + CFI_ENDPROC(); ELF(.size _gcry_cast5_amd64_cbc_dec,.-_gcry_cast5_amd64_cbc_dec;) .align 8 @@ -543,20 +592,28 @@ ELF(.type _gcry_cast5_amd64_cfb_dec,@function;) _gcry_cast5_amd64_cfb_dec: /* input: * %rdi: ctx, CTX - * %rsi: dst (8 blocks) - * %rdx: src (8 blocks) + * %rsi: dst (4 blocks) + * %rdx: src (4 blocks) * %rcx: iv (64bit) */ + CFI_STARTPROC(); ENTER_SYSV_FUNC_PARAMS_0_4 pushq %rbp; + CFI_PUSH(%rbp); pushq %rbx; + CFI_PUSH(%rbx); pushq %r12; + CFI_PUSH(%r12); pushq %r13; + CFI_PUSH(%r13); pushq %r14; + CFI_PUSH(%r14); pushq %rsi; + CFI_PUSH(%rsi); pushq %rdx; + CFI_PUSH(%rdx); /* Load input */ movq (%rcx), RLR0; @@ -573,7 +630,9 @@ _gcry_cast5_amd64_cfb_dec: call __cast5_enc_blk4; popq %rdx; /*src*/ + CFI_POP_TMP_REG(); popq %rcx; /*dst*/ + CFI_POP_TMP_REG(); xorq 0 * 8(%rdx), RLR0; xorq 1 * 8(%rdx), RLR1; @@ -585,14 +644,19 @@ _gcry_cast5_amd64_cfb_dec: movq RLR3, 3 * 8(%rcx); popq %r14; + CFI_POP(%r14); popq %r13; + CFI_POP(%r13); popq %r12; + CFI_POP(%r12); popq %rbx; + CFI_POP(%rbx); popq %rbp; + CFI_POP(%rbp); EXIT_SYSV_FUNC ret; - + CFI_ENDPROC(); ELF(.size _gcry_cast5_amd64_cfb_dec,.-_gcry_cast5_amd64_cfb_dec;) #endif /*defined(USE_CAST5)*/ diff --git a/cipher/chacha20-amd64-avx2.S b/cipher/chacha20-amd64-avx2.S index 94c8e8cf..de6263b6 100644 --- a/cipher/chacha20-amd64-avx2.S +++ b/cipher/chacha20-amd64-avx2.S @@ -179,11 +179,14 @@ _gcry_chacha20_amd64_avx2_blocks8: * %rdx: src * %rcx: nblks (multiple of 8) */ + CFI_STARTPROC(); vzeroupper; pushq %rbp; + CFI_PUSH(%rbp); movq %rsp, %rbp; + CFI_DEF_CFA_REGISTER(%rbp); subq $STACK_MAX, %rsp; andq $~31, %rsp; @@ -318,7 +321,9 @@ _gcry_chacha20_amd64_avx2_blocks8: /* eax zeroed by round loop. */ leave; + CFI_LEAVE(); ret; + CFI_ENDPROC(); ELF(.size _gcry_chacha20_amd64_avx2_blocks8, .-_gcry_chacha20_amd64_avx2_blocks8;) @@ -339,9 +344,12 @@ _gcry_chacha20_poly1305_amd64_avx2_blocks8: * %r9: poly1305-state * %r8: poly1305-src */ + CFI_STARTPROC(); pushq %rbp; + CFI_PUSH(%rbp); movq %rsp, %rbp; + CFI_DEF_CFA_REGISTER(%rbp); vzeroupper; @@ -353,6 +361,11 @@ _gcry_chacha20_poly1305_amd64_avx2_blocks8: movq %r13, (STACK_MAX + 2 * 8)(%rsp); movq %r14, (STACK_MAX + 3 * 8)(%rsp); movq %r15, (STACK_MAX + 4 * 8)(%rsp); + CFI_REG_ON_STACK(rbx, STACK_MAX + 0 * 8); + CFI_REG_ON_STACK(r12, STACK_MAX + 1 * 8); + CFI_REG_ON_STACK(r13, STACK_MAX + 2 * 8); + CFI_REG_ON_STACK(r14, STACK_MAX + 3 * 8); + CFI_REG_ON_STACK(r15, STACK_MAX + 4 * 8); movq %rdx, (STACK_MAX + 5 * 8)(%rsp); # SRC movq %rsi, (STACK_MAX + 6 * 8)(%rsp); # DST @@ -752,10 +765,17 @@ _gcry_chacha20_poly1305_amd64_avx2_blocks8: movq (STACK_MAX + 2 * 8)(%rsp), %r13; movq (STACK_MAX + 3 * 8)(%rsp), %r14; movq (STACK_MAX + 4 * 8)(%rsp), %r15; + CFI_RESTORE(%rbx); + CFI_RESTORE(%r12); + CFI_RESTORE(%r13); + CFI_RESTORE(%r14); + CFI_RESTORE(%r15); xorl %eax, %eax; leave; + CFI_LEAVE(); ret; + CFI_ENDPROC(); ELF(.size _gcry_chacha20_poly1305_amd64_avx2_blocks8, .-_gcry_chacha20_poly1305_amd64_avx2_blocks8;) diff --git a/cipher/chacha20-amd64-ssse3.S b/cipher/chacha20-amd64-ssse3.S index 1657f771..6bbf12fc 100644 --- a/cipher/chacha20-amd64-ssse3.S +++ b/cipher/chacha20-amd64-ssse3.S @@ -175,9 +175,12 @@ _gcry_chacha20_amd64_ssse3_blocks4: * %rdx: src * %rcx: nblks (multiple of 4) */ + CFI_STARTPROC(); pushq %rbp; + CFI_PUSH(%rbp); movq %rsp, %rbp; + CFI_DEF_CFA_REGISTER(%rbp); subq $STACK_MAX, %rsp; andq $~15, %rsp; @@ -329,7 +332,9 @@ _gcry_chacha20_amd64_ssse3_blocks4: /* eax zeroed by round loop. */ leave; + CFI_LEAVE(); ret; + CFI_ENDPROC(); ELF(.size _gcry_chacha20_amd64_ssse3_blocks4, .-_gcry_chacha20_amd64_ssse3_blocks4;) @@ -372,6 +377,7 @@ _gcry_chacha20_amd64_ssse3_blocks1: * %rdx: src * %rcx: nblks */ + CFI_STARTPROC(); /* Load constants */ movdqa .Lcounter1 rRIP, X4; @@ -497,6 +503,7 @@ _gcry_chacha20_amd64_ssse3_blocks1: /* eax zeroed by round loop. */ ret; + CFI_ENDPROC(); ELF(.size _gcry_chacha20_amd64_ssse3_blocks1, .-_gcry_chacha20_amd64_ssse3_blocks1;) @@ -517,9 +524,12 @@ _gcry_chacha20_poly1305_amd64_ssse3_blocks4: * %r9: poly1305-state * %r8: poly1305-src */ + CFI_STARTPROC(); pushq %rbp; + CFI_PUSH(%rbp); movq %rsp, %rbp; + CFI_DEF_CFA_REGISTER(%rbp); subq $(8 * 8) + STACK_MAX + 16, %rsp; andq $~15, %rsp; @@ -529,6 +539,11 @@ _gcry_chacha20_poly1305_amd64_ssse3_blocks4: movq %r13, (STACK_MAX + 2 * 8)(%rsp); movq %r14, (STACK_MAX + 3 * 8)(%rsp); movq %r15, (STACK_MAX + 4 * 8)(%rsp); + CFI_REG_ON_STACK(rbx, STACK_MAX + 0 * 8); + CFI_REG_ON_STACK(r12, STACK_MAX + 1 * 8); + CFI_REG_ON_STACK(r13, STACK_MAX + 2 * 8); + CFI_REG_ON_STACK(r14, STACK_MAX + 3 * 8); + CFI_REG_ON_STACK(r15, STACK_MAX + 4 * 8); movq %rdx, (STACK_MAX + 5 * 8)(%rsp); # SRC movq %rsi, (STACK_MAX + 6 * 8)(%rsp); # DST @@ -901,10 +916,17 @@ _gcry_chacha20_poly1305_amd64_ssse3_blocks4: movq (STACK_MAX + 2 * 8)(%rsp), %r13; movq (STACK_MAX + 3 * 8)(%rsp), %r14; movq (STACK_MAX + 4 * 8)(%rsp), %r15; + CFI_RESTORE(%rbx); + CFI_RESTORE(%r12); + CFI_RESTORE(%r13); + CFI_RESTORE(%r14); + CFI_RESTORE(%r15); xorl %eax, %eax; leave; + CFI_LEAVE(); ret; + CFI_ENDPROC(); ELF(.size _gcry_chacha20_poly1305_amd64_ssse3_blocks4, .-_gcry_chacha20_poly1305_amd64_ssse3_blocks4;) @@ -925,8 +947,12 @@ _gcry_chacha20_poly1305_amd64_ssse3_blocks1: * %r9: poly1305-state * %r8: poly1305-src */ + CFI_STARTPROC(); + pushq %rbp; + CFI_PUSH(%rbp); movq %rsp, %rbp; + CFI_DEF_CFA_REGISTER(%rbp); subq $(8 * 8), %rsp; movq %rbx, (0 * 8)(%rsp); @@ -934,6 +960,11 @@ _gcry_chacha20_poly1305_amd64_ssse3_blocks1: movq %r13, (2 * 8)(%rsp); movq %r14, (3 * 8)(%rsp); movq %r15, (4 * 8)(%rsp); + CFI_REG_ON_STACK(rbx, 0 * 8); + CFI_REG_ON_STACK(r12, 1 * 8); + CFI_REG_ON_STACK(r13, 2 * 8); + CFI_REG_ON_STACK(r14, 3 * 8); + CFI_REG_ON_STACK(r15, 4 * 8); movq %rdx, (5 * 8)(%rsp); # SRC movq %rsi, (6 * 8)(%rsp); # DST @@ -1206,10 +1237,17 @@ _gcry_chacha20_poly1305_amd64_ssse3_blocks1: movq (2 * 8)(%rsp), %r13; movq (3 * 8)(%rsp), %r14; movq (4 * 8)(%rsp), %r15; + CFI_RESTORE(%rbx); + CFI_RESTORE(%r12); + CFI_RESTORE(%r13); + CFI_RESTORE(%r14); + CFI_RESTORE(%r15); xorl %eax, %eax; leave; + CFI_LEAVE(); ret; + CFI_ENDPROC(); ELF(.size _gcry_chacha20_poly1305_amd64_ssse3_blocks1, .-_gcry_chacha20_poly1305_amd64_ssse3_blocks1;) diff --git a/cipher/des-amd64.S b/cipher/des-amd64.S index f25573d9..a211dac3 100644 --- a/cipher/des-amd64.S +++ b/cipher/des-amd64.S @@ -190,15 +190,23 @@ _gcry_3des_amd64_crypt_block: * %rsi: dst * %rdx: src */ + CFI_STARTPROC(); ENTER_SYSV_FUNC_PARAMS_0_4 pushq %rbp; + CFI_PUSH(%rbp); pushq %rbx; + CFI_PUSH(%rbx); pushq %r12; + CFI_PUSH(%r12); pushq %r13; + CFI_PUSH(%r13); pushq %r14; + CFI_PUSH(%r14); pushq %r15; + CFI_PUSH(%r15); pushq %rsi; /*dst*/ + CFI_PUSH(%rsi); leaq .L_s1 rRIP, SBOXES; @@ -259,18 +267,26 @@ _gcry_3des_amd64_crypt_block: round1(32+15, RL0, RR0, dummy2); popq RW2; /*dst*/ + CFI_POP_TMP_REG(); final_permutation(RR0, RL0); write_block(RW2, RR0, RL0); popq %r15; + CFI_POP(%r15); popq %r14; + CFI_POP(%r14); popq %r13; + CFI_POP(%r13); popq %r12; + CFI_POP(%r12); popq %rbx; + CFI_POP(%rbx); popq %rbp; + CFI_POP(%rbp); EXIT_SYSV_FUNC ret; + CFI_ENDPROC(); ELF(.size _gcry_3des_amd64_crypt_block,.-_gcry_3des_amd64_crypt_block;) /*********************************************************************** @@ -465,6 +481,7 @@ _gcry_3des_amd64_crypt_blk3: * RL0d, RR0d, RL1d, RR1d, RL2d, RR2d: 3 input blocks * RR0d, RL0d, RR1d, RL1d, RR2d, RL2d: 3 output blocks */ + CFI_STARTPROC(); leaq .L_s1 rRIP, SBOXES; @@ -528,6 +545,7 @@ _gcry_3des_amd64_crypt_blk3: final_permutation3(RR, RL); ret; + CFI_ENDPROC(); ELF(.size _gcry_3des_amd64_crypt_blk3,.-_gcry_3des_amd64_crypt_blk3;) .align 8 @@ -540,18 +558,28 @@ _gcry_3des_amd64_cbc_dec: * %rdx: src (3 blocks) * %rcx: iv (64bit) */ + CFI_STARTPROC(); ENTER_SYSV_FUNC_PARAMS_0_4 pushq %rbp; + CFI_PUSH(%rbp); pushq %rbx; + CFI_PUSH(%rbx); pushq %r12; + CFI_PUSH(%r12); pushq %r13; + CFI_PUSH(%r13); pushq %r14; + CFI_PUSH(%r14); pushq %r15; + CFI_PUSH(%r15); pushq %rsi; /*dst*/ + CFI_PUSH(%rsi); pushq %rdx; /*src*/ + CFI_PUSH(%rdx); pushq %rcx; /*iv*/ + CFI_PUSH(%rcx); /* load input */ movl 0 * 4(%rdx), RL0d; @@ -571,8 +599,11 @@ _gcry_3des_amd64_cbc_dec: call _gcry_3des_amd64_crypt_blk3; popq %rcx; /*iv*/ + CFI_POP_TMP_REG(); popq %rdx; /*src*/ + CFI_POP_TMP_REG(); popq %rsi; /*dst*/ + CFI_POP_TMP_REG(); bswapl RR0d; bswapl RL0d; @@ -598,14 +629,21 @@ _gcry_3des_amd64_cbc_dec: movl RL2d, 5 * 4(%rsi); popq %r15; + CFI_POP(%r15); popq %r14; + CFI_POP(%r14); popq %r13; + CFI_POP(%r13); popq %r12; + CFI_POP(%r12); popq %rbx; + CFI_POP(%rbx); popq %rbp; + CFI_POP(%rbp); EXIT_SYSV_FUNC ret; + CFI_ENDPROC(); ELF(.size _gcry_3des_amd64_cbc_dec,.-_gcry_3des_amd64_cbc_dec;) .align 8 @@ -618,17 +656,26 @@ _gcry_3des_amd64_ctr_enc: * %rdx: src (3 blocks) * %rcx: iv (64bit) */ + CFI_STARTPROC(); ENTER_SYSV_FUNC_PARAMS_0_4 pushq %rbp; + CFI_PUSH(%rbp); pushq %rbx; + CFI_PUSH(%rbx); pushq %r12; + CFI_PUSH(%r12); pushq %r13; + CFI_PUSH(%r13); pushq %r14; + CFI_PUSH(%r14); pushq %r15; + CFI_PUSH(%r15); pushq %rsi; /*dst*/ + CFI_PUSH(%rsi); pushq %rdx; /*src*/ + CFI_PUSH(%rdx); movq %rcx, RW2; /* load IV and byteswap */ @@ -654,7 +701,9 @@ _gcry_3des_amd64_ctr_enc: call _gcry_3des_amd64_crypt_blk3; popq %rdx; /*src*/ + CFI_POP_TMP_REG(); popq %rsi; /*dst*/ + CFI_POP_TMP_REG(); bswapl RR0d; bswapl RL0d; @@ -678,14 +727,21 @@ _gcry_3des_amd64_ctr_enc: movl RL2d, 5 * 4(%rsi); popq %r15; + CFI_POP(%r15); popq %r14; + CFI_POP(%r14); popq %r13; + CFI_POP(%r13); popq %r12; + CFI_POP(%r12); popq %rbx; + CFI_POP(%rbx); popq %rbp; + CFI_POP(%rbp); EXIT_SYSV_FUNC ret; + CFI_ENDPROC(); ELF(.size _gcry_3des_amd64_cbc_dec,.-_gcry_3des_amd64_cbc_dec;) .align 8 @@ -698,17 +754,26 @@ _gcry_3des_amd64_cfb_dec: * %rdx: src (3 blocks) * %rcx: iv (64bit) */ + CFI_STARTPROC(); ENTER_SYSV_FUNC_PARAMS_0_4 pushq %rbp; + CFI_PUSH(%rbp); pushq %rbx; + CFI_PUSH(%rbx); pushq %r12; + CFI_PUSH(%r12); pushq %r13; + CFI_PUSH(%r13); pushq %r14; + CFI_PUSH(%r14); pushq %r15; + CFI_PUSH(%r15); pushq %rsi; /*dst*/ + CFI_PUSH(%rsi); pushq %rdx; /*src*/ + CFI_PUSH(%rdx); movq %rcx, RW2; /* Load input */ @@ -733,7 +798,9 @@ _gcry_3des_amd64_cfb_dec: call _gcry_3des_amd64_crypt_blk3; popq %rdx; /*src*/ + CFI_POP_TMP_REG(); popq %rsi; /*dst*/ + CFI_POP_TMP_REG(); bswapl RR0d; bswapl RL0d; @@ -757,14 +824,21 @@ _gcry_3des_amd64_cfb_dec: movl RL2d, 5 * 4(%rsi); popq %r15; + CFI_POP(%r15); popq %r14; + CFI_POP(%r14); popq %r13; + CFI_POP(%r13); popq %r12; + CFI_POP(%r12); popq %rbx; + CFI_POP(%rbx); popq %rbp; + CFI_POP(%rbp); EXIT_SYSV_FUNC ret; + CFI_ENDPROC(); ELF(.size _gcry_3des_amd64_cfb_dec,.-_gcry_3des_amd64_cfb_dec;) .align 16 diff --git a/cipher/rijndael-amd64.S b/cipher/rijndael-amd64.S index 798ff51a..3dcaa856 100644 --- a/cipher/rijndael-amd64.S +++ b/cipher/rijndael-amd64.S @@ -212,14 +212,19 @@ _gcry_aes_amd64_encrypt_block: * %ecx: number of rounds.. 10, 12 or 14 * %r8: encryption tables */ + CFI_STARTPROC(); ENTER_SYSV_FUNC_PARAMS_5 subq $(5 * 8), %rsp; + CFI_ADJUST_CFA_OFFSET(5 * 8); movq %rsi, (0 * 8)(%rsp); movl %ecx, (1 * 8)(%rsp); movq %rbp, (2 * 8)(%rsp); movq %rbx, (3 * 8)(%rsp); movq %r12, (4 * 8)(%rsp); + CFI_REL_OFFSET(%rbp, 2 * 8); + CFI_REL_OFFSET(%rbx, 3 * 8); + CFI_REL_OFFSET(%r12, 4 * 8); leaq (%r8), RTAB; @@ -251,16 +256,23 @@ _gcry_aes_amd64_encrypt_block: movl RCd, 2 * 4(%rsi); movl RDd, 3 * 4(%rsi); + CFI_REMEMBER_STATE(); + movq (4 * 8)(%rsp), %r12; movq (3 * 8)(%rsp), %rbx; movq (2 * 8)(%rsp), %rbp; + CFI_RESTORE(%r12); + CFI_RESTORE(%rbx); + CFI_RESTORE(%rbp); addq $(5 * 8), %rsp; + CFI_ADJUST_CFA_OFFSET(-5 * 8); movl $(6 * 8), %eax; EXIT_SYSV_FUNC ret; + CFI_RESTORE_STATE(); .align 4 .Lenc_not_128: je .Lenc_192 @@ -280,6 +292,7 @@ _gcry_aes_amd64_encrypt_block: lastencround(11); jmp .Lenc_done; + CFI_ENDPROC(); ELF(.size _gcry_aes_amd64_encrypt_block,.-_gcry_aes_amd64_encrypt_block;) #define do_decround(next_r) \ @@ -376,14 +389,19 @@ _gcry_aes_amd64_decrypt_block: * %ecx: number of rounds.. 10, 12 or 14 * %r8: decryption tables */ + CFI_STARTPROC(); ENTER_SYSV_FUNC_PARAMS_5 subq $(5 * 8), %rsp; + CFI_ADJUST_CFA_OFFSET(5 * 8); movq %rsi, (0 * 8)(%rsp); movl %ecx, (1 * 8)(%rsp); movq %rbp, (2 * 8)(%rsp); movq %rbx, (3 * 8)(%rsp); movq %r12, (4 * 8)(%rsp); + CFI_REL_OFFSET(%rbp, 2 * 8); + CFI_REL_OFFSET(%rbx, 3 * 8); + CFI_REL_OFFSET(%r12, 4 * 8); leaq (%r8), RTAB; @@ -416,16 +434,23 @@ _gcry_aes_amd64_decrypt_block: movl RCd, 2 * 4(%rsi); movl RDd, 3 * 4(%rsi); + CFI_REMEMBER_STATE(); + movq (4 * 8)(%rsp), %r12; movq (3 * 8)(%rsp), %rbx; movq (2 * 8)(%rsp), %rbp; + CFI_RESTORE(%r12); + CFI_RESTORE(%rbx); + CFI_RESTORE(%rbp); addq $(5 * 8), %rsp; + CFI_ADJUST_CFA_OFFSET(-5 * 8); movl $(6 * 8), %eax; EXIT_SYSV_FUNC ret; + CFI_RESTORE_STATE(); .align 4 .Ldec_256: je .Ldec_192; @@ -445,6 +470,7 @@ _gcry_aes_amd64_decrypt_block: decround(9); jmp .Ldec_tail; + CFI_ENDPROC(); ELF(.size _gcry_aes_amd64_decrypt_block,.-_gcry_aes_amd64_decrypt_block;) #endif /*USE_AES*/ diff --git a/cipher/rijndael-ssse3-amd64-asm.S b/cipher/rijndael-ssse3-amd64-asm.S index ffce5df2..8124eb21 100644 --- a/cipher/rijndael-ssse3-amd64-asm.S +++ b/cipher/rijndael-ssse3-amd64-asm.S @@ -50,6 +50,7 @@ ELF(.type _gcry_aes_ssse3_enc_preload,@function) .globl _gcry_aes_ssse3_enc_preload _gcry_aes_ssse3_enc_preload: + CFI_STARTPROC(); ENTER_SYSV_FUNC_PARAMS_0_4 lea .Laes_consts(%rip), %rax movdqa (%rax), %xmm9 # 0F @@ -61,6 +62,7 @@ _gcry_aes_ssse3_enc_preload: movdqa .Lk_sb2+16(%rax), %xmm14 # sb2t EXIT_SYSV_FUNC ret + CFI_ENDPROC(); ELF(.size _gcry_aes_ssse3_enc_preload,.-_gcry_aes_ssse3_enc_preload) ## @@ -69,6 +71,7 @@ ELF(.size _gcry_aes_ssse3_enc_preload,.-_gcry_aes_ssse3_enc_preload) ELF(.type _gcry_aes_ssse3_dec_preload,@function) .globl _gcry_aes_ssse3_dec_preload _gcry_aes_ssse3_dec_preload: + CFI_STARTPROC(); ENTER_SYSV_FUNC_PARAMS_0_4 lea .Laes_consts(%rip), %rax movdqa (%rax), %xmm9 # 0F @@ -81,6 +84,7 @@ _gcry_aes_ssse3_dec_preload: movdqa .Lk_dsbe (%rax), %xmm8 # sbeu EXIT_SYSV_FUNC ret + CFI_ENDPROC(); ELF(.size _gcry_aes_ssse3_dec_preload,.-_gcry_aes_ssse3_dec_preload) ## @@ -111,6 +115,7 @@ ELF(.type _gcry_aes_ssse3_encrypt_core,@function) .globl _gcry_aes_ssse3_encrypt_core _gcry_aes_ssse3_encrypt_core: _aes_encrypt_core: + CFI_STARTPROC(); ENTER_SYSV_FUNC_PARAMS_0_4 mov %rdi, %rdx leaq -1(%rsi), %rax @@ -190,6 +195,7 @@ _aes_encrypt_core: pshufb .Lk_sr(%rsi,%rcx), %xmm0 EXIT_SYSV_FUNC ret + CFI_ENDPROC(); ELF(.size _aes_encrypt_core,.-_aes_encrypt_core) ## @@ -202,6 +208,7 @@ ELF(.size _aes_encrypt_core,.-_aes_encrypt_core) ELF(.type _gcry_aes_ssse3_decrypt_core,@function) _gcry_aes_ssse3_decrypt_core: _aes_decrypt_core: + CFI_STARTPROC(); ENTER_SYSV_FUNC_PARAMS_0_4 mov %rdi, %rdx lea .Laes_consts(%rip), %rcx @@ -297,6 +304,7 @@ _aes_decrypt_core: pshufb .Lk_sr(%rsi,%rcx), %xmm0 EXIT_SYSV_FUNC ret + CFI_ENDPROC(); ELF(.size _aes_decrypt_core,.-_aes_decrypt_core) ######################################################## @@ -315,6 +323,7 @@ _aes_schedule_core: # rdx = buffer # rcx = direction. 0=encrypt, 1=decrypt # r8 = rotoffs + CFI_STARTPROC(); ENTER_SYSV_FUNC_PARAMS_5 # load the tables @@ -671,6 +680,7 @@ _aes_schedule_core: pxor %xmm8, %xmm8 EXIT_SYSV_FUNC ret + CFI_ENDPROC(); ELF(.size _gcry_aes_ssse3_schedule_core,.-_gcry_aes_ssse3_schedule_core) ######################################################## diff --git a/cipher/salsa20-amd64.S b/cipher/salsa20-amd64.S index 470c32aa..ae8f2715 100644 --- a/cipher/salsa20-amd64.S +++ b/cipher/salsa20-amd64.S @@ -28,11 +28,7 @@ #if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_SALSA20) -#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS -# define ELF(...) __VA_ARGS__ -#else -# define ELF(...) /*_*/ -#endif +#include "asm-common-amd64.h" .text @@ -40,6 +36,7 @@ .globl _gcry_salsa20_amd64_keysetup ELF(.type _gcry_salsa20_amd64_keysetup,@function;) _gcry_salsa20_amd64_keysetup: + CFI_STARTPROC(); movl 0(%rsi),%r8d movl 4(%rsi),%r9d movl 8(%rsi),%eax @@ -87,11 +84,13 @@ _gcry_salsa20_amd64_keysetup: movl %r8d,12(%rdi) .L_keysetupdone: ret + CFI_ENDPROC(); .align 8 .globl _gcry_salsa20_amd64_ivsetup ELF(.type _gcry_salsa20_amd64_ivsetup,@function;) _gcry_salsa20_amd64_ivsetup: + CFI_STARTPROC(); movl 0(%rsi),%r8d movl 4(%rsi),%esi mov $0,%r9 @@ -101,6 +100,7 @@ _gcry_salsa20_amd64_ivsetup: movl %r9d,32(%rdi) movl %eax,52(%rdi) ret + CFI_ENDPROC(); .align 8 .globl _gcry_salsa20_amd64_encrypt_blocks @@ -112,13 +112,15 @@ _gcry_salsa20_amd64_encrypt_blocks: * - Length is input as number of blocks, so don't handle tail bytes * (this is done in salsa20.c). */ + CFI_STARTPROC(); push %rbx + CFI_PUSH(%rbx); shlq $6, %rcx /* blocks to bytes */ mov %r8, %rbx mov %rsp,%r11 - and $31,%r11 - add $384,%r11 - sub %r11,%rsp + CFI_DEF_CFA_REGISTER(%r11); + sub $384,%rsp + and $~31,%rsp mov %rdi,%r8 mov %rsi,%rsi mov %rdx,%rdi @@ -916,15 +918,22 @@ _gcry_salsa20_amd64_encrypt_blocks: cmp $64,%rdx ja .L_bytes_are_128_or_192 .L_done: - add %r11,%rsp + CFI_REMEMBER_STATE(); mov %r11,%rax + sub %rsp,%rax + mov %r11,%rsp + CFI_REGISTER(%r11, %rsp) + CFI_DEF_CFA_REGISTER(%rsp) pop %rbx + CFI_POP(%rbx) ret + CFI_RESTORE_STATE(); .L_bytes_are_128_or_192: sub $64,%rdx add $64,%rdi add $64,%rsi jmp .L_bytes_are_64_128_or_192 + CFI_ENDPROC(); ELF(.size _gcry_salsa20_amd64_encrypt_blocks,.-_gcry_salsa20_amd64_encrypt_blocks;) #endif /*defined(USE_SALSA20)*/ diff --git a/cipher/serpent-avx2-amd64.S b/cipher/serpent-avx2-amd64.S index 8d60a159..9b17c2bd 100644 --- a/cipher/serpent-avx2-amd64.S +++ b/cipher/serpent-avx2-amd64.S @@ -24,17 +24,7 @@ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_SERPENT) && \ defined(ENABLE_AVX2_SUPPORT) -#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS -# define ELF(...) __VA_ARGS__ -#else -# define ELF(...) /*_*/ -#endif - -#ifdef __PIC__ -# define RIP (%rip) -#else -# define RIP -#endif +#include "asm-common-amd64.h" /* struct serpent_context: */ #define ctx_keys 0 @@ -421,6 +411,7 @@ __serpent_enc_blk16: * RA4, RA1, RA2, RA0, RB4, RB1, RB2, RB0: sixteen parallel * ciphertext blocks */ + CFI_STARTPROC(); vpcmpeqd RNOT, RNOT, RNOT; @@ -496,6 +487,7 @@ __serpent_enc_blk16: transpose_4x4(RB4, RB1, RB2, RB0, RB3, RTMP0, RTMP1); ret; + CFI_ENDPROC(); ELF(.size __serpent_enc_blk16,.-__serpent_enc_blk16;) .align 8 @@ -509,6 +501,7 @@ __serpent_dec_blk16: * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: sixteen parallel * plaintext blocks */ + CFI_STARTPROC(); vpcmpeqd RNOT, RNOT, RNOT; @@ -586,6 +579,7 @@ __serpent_dec_blk16: transpose_4x4(RB0, RB1, RB2, RB3, RB4, RTMP0, RTMP1); ret; + CFI_ENDPROC(); ELF(.size __serpent_dec_blk16,.-__serpent_dec_blk16;) #define inc_le128(x, minus_one, tmp) \ @@ -604,13 +598,14 @@ _gcry_serpent_avx2_ctr_enc: * %rdx: src (16 blocks) * %rcx: iv (big endian, 128bit) */ + CFI_STARTPROC(); movq 8(%rcx), %rax; bswapq %rax; vzeroupper; - vbroadcasti128 .Lbswap128_mask RIP, RTMP3; + vbroadcasti128 .Lbswap128_mask rRIP, RTMP3; vpcmpeqd RNOT, RNOT, RNOT; vpsrldq $8, RNOT, RNOT; /* ab: -1:0 ; cd: -1:0 */ vpaddq RNOT, RNOT, RTMP2; /* ab: -2:0 ; cd: -2:0 */ @@ -701,7 +696,8 @@ _gcry_serpent_avx2_ctr_enc: vzeroall; - ret + ret; + CFI_ENDPROC(); ELF(.size _gcry_serpent_avx2_ctr_enc,.-_gcry_serpent_avx2_ctr_enc;) .align 8 @@ -714,6 +710,7 @@ _gcry_serpent_avx2_cbc_dec: * %rdx: src (16 blocks) * %rcx: iv */ + CFI_STARTPROC(); vzeroupper; @@ -752,7 +749,8 @@ _gcry_serpent_avx2_cbc_dec: vzeroall; - ret + ret; + CFI_ENDPROC(); ELF(.size _gcry_serpent_avx2_cbc_dec,.-_gcry_serpent_avx2_cbc_dec;) .align 8 @@ -765,6 +763,7 @@ _gcry_serpent_avx2_cfb_dec: * %rdx: src (16 blocks) * %rcx: iv */ + CFI_STARTPROC(); vzeroupper; @@ -805,7 +804,8 @@ _gcry_serpent_avx2_cfb_dec: vzeroall; - ret + ret; + CFI_ENDPROC(); ELF(.size _gcry_serpent_avx2_cfb_dec,.-_gcry_serpent_avx2_cfb_dec;) .align 8 @@ -821,15 +821,21 @@ _gcry_serpent_avx2_ocb_enc: * %r8 : checksum * %r9 : L pointers (void *L[16]) */ + CFI_STARTPROC(); vzeroupper; subq $(4 * 8), %rsp; + CFI_ADJUST_CFA_OFFSET(4 * 8); movq %r10, (0 * 8)(%rsp); movq %r11, (1 * 8)(%rsp); movq %r12, (2 * 8)(%rsp); movq %r13, (3 * 8)(%rsp); + CFI_REL_OFFSET(%r10, 0 * 8); + CFI_REL_OFFSET(%r11, 1 * 8); + CFI_REL_OFFSET(%r12, 2 * 8); + CFI_REL_OFFSET(%r13, 3 * 8); vmovdqu (%rcx), RTMP0x; vmovdqu (%r8), RTMP1x; @@ -882,10 +888,15 @@ _gcry_serpent_avx2_ocb_enc: movq (1 * 8)(%rsp), %r11; movq (2 * 8)(%rsp), %r12; movq (3 * 8)(%rsp), %r13; + CFI_RESTORE(%r10); + CFI_RESTORE(%r11); + CFI_RESTORE(%r12); + CFI_RESTORE(%r13); call __serpent_enc_blk16; addq $(4 * 8), %rsp; + CFI_ADJUST_CFA_OFFSET(-4 * 8); vpxor (0 * 32)(%rsi), RA4, RA4; vpxor (1 * 32)(%rsi), RA1, RA1; @@ -908,6 +919,7 @@ _gcry_serpent_avx2_ocb_enc: vzeroall; ret; + CFI_ENDPROC(); ELF(.size _gcry_serpent_avx2_ocb_enc,.-_gcry_serpent_avx2_ocb_enc;) .align 8 @@ -923,15 +935,21 @@ _gcry_serpent_avx2_ocb_dec: * %r8 : checksum * %r9 : L pointers (void *L[16]) */ + CFI_STARTPROC(); vzeroupper; subq $(4 * 8), %rsp; + CFI_ADJUST_CFA_OFFSET(4 * 8); movq %r10, (0 * 8)(%rsp); movq %r11, (1 * 8)(%rsp); movq %r12, (2 * 8)(%rsp); movq %r13, (3 * 8)(%rsp); + CFI_REL_OFFSET(%r10, 0 * 8); + CFI_REL_OFFSET(%r11, 1 * 8); + CFI_REL_OFFSET(%r12, 2 * 8); + CFI_REL_OFFSET(%r13, 3 * 8); vmovdqu (%rcx), RTMP0x; @@ -978,10 +996,15 @@ _gcry_serpent_avx2_ocb_dec: movq (1 * 8)(%rsp), %r11; movq (2 * 8)(%rsp), %r12; movq (3 * 8)(%rsp), %r13; + CFI_RESTORE(%r10); + CFI_RESTORE(%r11); + CFI_RESTORE(%r12); + CFI_RESTORE(%r13); call __serpent_dec_blk16; addq $(4 * 8), %rsp; + CFI_ADJUST_CFA_OFFSET(-4 * 8); vmovdqu (%r8), RTMP1x; @@ -1020,6 +1043,7 @@ _gcry_serpent_avx2_ocb_dec: vzeroall; ret; + CFI_ENDPROC(); ELF(.size _gcry_serpent_avx2_ocb_dec,.-_gcry_serpent_avx2_ocb_dec;) .align 8 @@ -1034,15 +1058,21 @@ _gcry_serpent_avx2_ocb_auth: * %rcx: checksum * %r8 : L pointers (void *L[16]) */ + CFI_STARTPROC(); vzeroupper; subq $(4 * 8), %rsp; + CFI_ADJUST_CFA_OFFSET(4 * 8); movq %r10, (0 * 8)(%rsp); movq %r11, (1 * 8)(%rsp); movq %r12, (2 * 8)(%rsp); movq %r13, (3 * 8)(%rsp); + CFI_REL_OFFSET(%r10, 0 * 8); + CFI_REL_OFFSET(%r11, 1 * 8); + CFI_REL_OFFSET(%r12, 2 * 8); + CFI_REL_OFFSET(%r13, 3 * 8); vmovdqu (%rdx), RTMP0x; @@ -1088,10 +1118,15 @@ _gcry_serpent_avx2_ocb_auth: movq (1 * 8)(%rsp), %r11; movq (2 * 8)(%rsp), %r12; movq (3 * 8)(%rsp), %r13; + CFI_RESTORE(%r10); + CFI_RESTORE(%r11); + CFI_RESTORE(%r12); + CFI_RESTORE(%r13); call __serpent_enc_blk16; addq $(4 * 8), %rsp; + CFI_ADJUST_CFA_OFFSET(-4 * 8); vpxor RA4, RB4, RA4; vpxor RA1, RB1, RA1; @@ -1111,6 +1146,7 @@ _gcry_serpent_avx2_ocb_auth: vzeroall; ret; + CFI_ENDPROC(); ELF(.size _gcry_serpent_avx2_ocb_auth,.-_gcry_serpent_avx2_ocb_auth;) .align 16 diff --git a/cipher/serpent-sse2-amd64.S b/cipher/serpent-sse2-amd64.S index b149af24..39cba002 100644 --- a/cipher/serpent-sse2-amd64.S +++ b/cipher/serpent-sse2-amd64.S @@ -23,17 +23,7 @@ #if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_SERPENT) -#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS -# define ELF(...) __VA_ARGS__ -#else -# define ELF(...) /*_*/ -#endif - -#ifdef __PIC__ -# define RIP (%rip) -#else -# define RIP -#endif +#include "asm-common-amd64.h" /* struct serpent_context: */ #define ctx_keys 0 @@ -444,6 +434,7 @@ __serpent_enc_blk8: * RA4, RA1, RA2, RA0, RB4, RB1, RB2, RB0: eight parallel * ciphertext blocks */ + CFI_STARTPROC(); pcmpeqd RNOT, RNOT; @@ -519,6 +510,7 @@ __serpent_enc_blk8: transpose_4x4(RB4, RB1, RB2, RB0, RB3, RTMP0, RTMP1); ret; + CFI_ENDPROC(); ELF(.size __serpent_enc_blk8,.-__serpent_enc_blk8;) .align 8 @@ -532,6 +524,7 @@ __serpent_dec_blk8: * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel plaintext * blocks */ + CFI_STARTPROC(); pcmpeqd RNOT, RNOT; @@ -609,6 +602,7 @@ __serpent_dec_blk8: transpose_4x4(RB0, RB1, RB2, RB3, RB4, RTMP0, RTMP1); ret; + CFI_ENDPROC(); ELF(.size __serpent_dec_blk8,.-__serpent_dec_blk8;) .align 8 @@ -621,6 +615,7 @@ _gcry_serpent_sse2_ctr_enc: * %rdx: src (8 blocks) * %rcx: iv (big endian, 128bit) */ + CFI_STARTPROC(); /* load IV and byteswap */ movdqu (%rcx), RA0; @@ -738,7 +733,8 @@ _gcry_serpent_sse2_ctr_enc: pxor RTMP2, RTMP2; pxor RNOT, RNOT; - ret + ret; + CFI_ENDPROC(); ELF(.size _gcry_serpent_sse2_ctr_enc,.-_gcry_serpent_sse2_ctr_enc;) .align 8 @@ -751,6 +747,7 @@ _gcry_serpent_sse2_cbc_dec: * %rdx: src (8 blocks) * %rcx: iv */ + CFI_STARTPROC(); movdqu (0 * 16)(%rdx), RA0; movdqu (1 * 16)(%rdx), RA1; @@ -799,7 +796,8 @@ _gcry_serpent_sse2_cbc_dec: pxor RTMP2, RTMP2; pxor RNOT, RNOT; - ret + ret; + CFI_ENDPROC(); ELF(.size _gcry_serpent_sse2_cbc_dec,.-_gcry_serpent_sse2_cbc_dec;) .align 8 @@ -812,6 +810,7 @@ _gcry_serpent_sse2_cfb_dec: * %rdx: src (8 blocks) * %rcx: iv */ + CFI_STARTPROC(); /* Load input */ movdqu (%rcx), RA0; @@ -863,7 +862,8 @@ _gcry_serpent_sse2_cfb_dec: pxor RTMP2, RTMP2; pxor RNOT, RNOT; - ret + ret; + CFI_ENDPROC(); ELF(.size _gcry_serpent_sse2_cfb_dec,.-_gcry_serpent_sse2_cfb_dec;) .align 8 @@ -879,13 +879,19 @@ _gcry_serpent_sse2_ocb_enc: * %r8 : checksum * %r9 : L pointers (void *L[8]) */ + CFI_STARTPROC(); subq $(4 * 8), %rsp; + CFI_ADJUST_CFA_OFFSET(4 * 8); movq %r10, (0 * 8)(%rsp); movq %r11, (1 * 8)(%rsp); movq %r12, (2 * 8)(%rsp); movq %r13, (3 * 8)(%rsp); + CFI_REL_OFFSET(%r10, 0 * 8); + CFI_REL_OFFSET(%r11, 1 * 8); + CFI_REL_OFFSET(%r12, 2 * 8); + CFI_REL_OFFSET(%r13, 3 * 8); movdqu (%rcx), RTMP0; movdqu (%r8), RTMP1; @@ -926,10 +932,15 @@ _gcry_serpent_sse2_ocb_enc: movq (1 * 8)(%rsp), %r11; movq (2 * 8)(%rsp), %r12; movq (3 * 8)(%rsp), %r13; + CFI_RESTORE(%r10); + CFI_RESTORE(%r11); + CFI_RESTORE(%r12); + CFI_RESTORE(%r13); call __serpent_enc_blk8; addq $(4 * 8), %rsp; + CFI_ADJUST_CFA_OFFSET(-4 * 8); pxor_u((0 * 16)(%rsi), RA4, RTMP0); pxor_u((1 * 16)(%rsi), RA1, RTMP0); @@ -966,6 +977,7 @@ _gcry_serpent_sse2_ocb_enc: pxor RNOT, RNOT; ret; + CFI_ENDPROC(); ELF(.size _gcry_serpent_sse2_ocb_enc,.-_gcry_serpent_sse2_ocb_enc;) .align 8 @@ -981,13 +993,19 @@ _gcry_serpent_sse2_ocb_dec: * %r8 : checksum * %r9 : L pointers (void *L[8]) */ + CFI_STARTPROC(); subq $(4 * 8), %rsp; + CFI_ADJUST_CFA_OFFSET(4 * 8); movq %r10, (0 * 8)(%rsp); movq %r11, (1 * 8)(%rsp); movq %r12, (2 * 8)(%rsp); movq %r13, (3 * 8)(%rsp); + CFI_REL_OFFSET(%r10, 0 * 8); + CFI_REL_OFFSET(%r11, 1 * 8); + CFI_REL_OFFSET(%r12, 2 * 8); + CFI_REL_OFFSET(%r13, 3 * 8); movdqu (%rcx), RTMP0; @@ -1024,10 +1042,15 @@ _gcry_serpent_sse2_ocb_dec: movq (1 * 8)(%rsp), %r11; movq (2 * 8)(%rsp), %r12; movq (3 * 8)(%rsp), %r13; + CFI_RESTORE(%r10); + CFI_RESTORE(%r11); + CFI_RESTORE(%r12); + CFI_RESTORE(%r13); call __serpent_dec_blk8; addq $(4 * 8), %rsp; + CFI_ADJUST_CFA_OFFSET(-4 * 8); movdqu (%r8), RTMP0; @@ -1078,6 +1101,7 @@ _gcry_serpent_sse2_ocb_dec: pxor RNOT, RNOT; ret; + CFI_ENDPROC(); ELF(.size _gcry_serpent_sse2_ocb_dec,.-_gcry_serpent_sse2_ocb_dec;) .align 8 @@ -1092,13 +1116,19 @@ _gcry_serpent_sse2_ocb_auth: * %rcx: checksum * %r8 : L pointers (void *L[8]) */ + CFI_STARTPROC(); subq $(4 * 8), %rsp; + CFI_ADJUST_CFA_OFFSET(4 * 8); movq %r10, (0 * 8)(%rsp); movq %r11, (1 * 8)(%rsp); movq %r12, (2 * 8)(%rsp); movq %r13, (3 * 8)(%rsp); + CFI_REL_OFFSET(%r10, 0 * 8); + CFI_REL_OFFSET(%r11, 1 * 8); + CFI_REL_OFFSET(%r12, 2 * 8); + CFI_REL_OFFSET(%r13, 3 * 8); movdqu (%rdx), RTMP0; @@ -1134,10 +1164,15 @@ _gcry_serpent_sse2_ocb_auth: movq (1 * 8)(%rsp), %r11; movq (2 * 8)(%rsp), %r12; movq (3 * 8)(%rsp), %r13; + CFI_RESTORE(%r10); + CFI_RESTORE(%r11); + CFI_RESTORE(%r12); + CFI_RESTORE(%r13); call __serpent_enc_blk8; addq $(4 * 8), %rsp; + CFI_ADJUST_CFA_OFFSET(-4 * 8); movdqu (%rcx), RTMP0; pxor RB4, RA4; @@ -1169,6 +1204,7 @@ _gcry_serpent_sse2_ocb_auth: pxor RNOT, RNOT; ret; + CFI_ENDPROC(); ELF(.size _gcry_serpent_sse2_ocb_auth,.-_gcry_serpent_sse2_ocb_auth;) #endif /*defined(USE_SERPENT)*/ diff --git a/cipher/sha1-avx-amd64.S b/cipher/sha1-avx-amd64.S index 5d674c15..85876ad4 100644 --- a/cipher/sha1-avx-amd64.S +++ b/cipher/sha1-avx-amd64.S @@ -33,18 +33,7 @@ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ defined(HAVE_GCC_INLINE_ASM_AVX) && defined(USE_SHA1) -#ifdef __PIC__ -# define RIP (%rip) -#else -# define RIP -#endif - - -#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS -# define ELF(...) __VA_ARGS__ -#else -# define ELF(...) /*_*/ -#endif +#include "asm-common-amd64.h" /* Context structure */ @@ -161,7 +150,7 @@ vpshufb BSWAP_REG, tmp0, W; #define W_PRECALC_00_15_2(i, W, tmp0) \ - vpaddd (.LK_XMM + ((i)/20)*16) RIP, W, tmp0; + vpaddd (.LK_XMM + ((i)/20)*16) rRIP, W, tmp0; #define W_PRECALC_00_15_3(i, W, tmp0) \ vmovdqa tmp0, WK(i&~3); @@ -186,7 +175,7 @@ #define W_PRECALC_16_31_3(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \ vpxor W, tmp0, tmp0; \ vpxor tmp1, tmp0, W; \ - vpaddd (.LK_XMM + ((i)/20)*16) RIP, W, tmp0; \ + vpaddd (.LK_XMM + ((i)/20)*16) rRIP, W, tmp0; \ vmovdqa tmp0, WK((i)&~3); #define W_PRECALC_32_79_0(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28, tmp0) \ @@ -203,7 +192,7 @@ #define W_PRECALC_32_79_3(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28, tmp0) \ vpor W, tmp0, W; \ - vpaddd (.LK_XMM + ((i)/20)*16) RIP, W, tmp0; \ + vpaddd (.LK_XMM + ((i)/20)*16) rRIP, W, tmp0; \ vmovdqa tmp0, WK((i)&~3); @@ -223,6 +212,7 @@ _gcry_sha1_transform_amd64_avx: * %rsi: data (64*nblks bytes) * %rdx: nblks */ + CFI_STARTPROC(); xorl %eax, %eax; cmpq $0, %rdx; @@ -234,9 +224,12 @@ _gcry_sha1_transform_amd64_avx: movq %rdi, RSTATE; movq %rsi, RDATA; pushq %rbx; + CFI_PUSH(%rbx); pushq %rbp; + CFI_PUSH(%rbp); movq %rsp, ROLDSTACK; + CFI_DEF_CFA_REGISTER(ROLDSTACK); subq $(16*4), %rsp; andq $(~31), %rsp; @@ -248,7 +241,7 @@ _gcry_sha1_transform_amd64_avx: movl state_h3(RSTATE), d; movl state_h4(RSTATE), e; - vmovdqa .Lbswap_shufb_ctl RIP, BSWAP_REG; + vmovdqa .Lbswap_shufb_ctl rRIP, BSWAP_REG; /* Precalc 0-15. */ W_PRECALC_00_15_0(0, W0, Wtmp0); @@ -415,15 +408,20 @@ _gcry_sha1_transform_amd64_avx: movl e, state_h4(RSTATE); movq ROLDSTACK, %rsp; + CFI_REGISTER(ROLDSTACK, %rsp); + CFI_DEF_CFA_REGISTER(%rsp); popq %rbp; + CFI_POP(%rbp); popq %rbx; + CFI_POP(%rbx); /* stack already burned */ xorl %eax, %eax; .Lret: ret; + CFI_ENDPROC(); ELF(.size _gcry_sha1_transform_amd64_avx, .-_gcry_sha1_transform_amd64_avx;) diff --git a/cipher/sha1-avx-bmi2-amd64.S b/cipher/sha1-avx-bmi2-amd64.S index fe8901ef..5dfcdca9 100644 --- a/cipher/sha1-avx-bmi2-amd64.S +++ b/cipher/sha1-avx-bmi2-amd64.S @@ -34,18 +34,7 @@ defined(HAVE_GCC_INLINE_ASM_BMI2) && \ defined(HAVE_GCC_INLINE_ASM_AVX) && defined(USE_SHA1) -#ifdef __PIC__ -# define RIP (%rip) -#else -# define RIP -#endif - - -#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS -# define ELF(...) __VA_ARGS__ -#else -# define ELF(...) /*_*/ -#endif +#include "asm-common-amd64.h" /* Context structure */ @@ -222,6 +211,7 @@ _gcry_sha1_transform_amd64_avx_bmi2: * %rsi: data (64*nblks bytes) * %rdx: nblks */ + CFI_STARTPROC(); xorl %eax, %eax; cmpq $0, %rdx; @@ -233,10 +223,14 @@ _gcry_sha1_transform_amd64_avx_bmi2: movq %rdi, RSTATE; movq %rsi, RDATA; pushq %rbx; + CFI_PUSH(%rbx); pushq %rbp; + CFI_PUSH(%rbp); pushq %r12; + CFI_PUSH(%r12); movq %rsp, ROLDSTACK; + CFI_DEF_CFA_REGISTER(ROLDSTACK); subq $(16*4), %rsp; andq $(~31), %rsp; @@ -249,11 +243,11 @@ _gcry_sha1_transform_amd64_avx_bmi2: movl state_h4(RSTATE), e; xorl ne, ne; - vmovdqa .Lbswap_shufb_ctl RIP, BSWAP_REG; - vpbroadcastd .LK1 RIP, K1; - vpbroadcastd .LK2 RIP, K2; - vpbroadcastd .LK3 RIP, K3; - vpbroadcastd .LK4 RIP, K4; + vmovdqa .Lbswap_shufb_ctl rRIP, BSWAP_REG; + vpbroadcastd .LK1 rRIP, K1; + vpbroadcastd .LK2 rRIP, K2; + vpbroadcastd .LK3 rRIP, K3; + vpbroadcastd .LK4 rRIP, K4; /* Precalc 0-15. */ W_PRECALC_00_15_0(0, W0, Wtmp0); @@ -424,16 +418,22 @@ _gcry_sha1_transform_amd64_avx_bmi2: movl e, state_h4(RSTATE); movq ROLDSTACK, %rsp; + CFI_REGISTER(ROLDSTACK, %rsp); + CFI_DEF_CFA_REGISTER(%rsp); popq %r12; + CFI_POP(%r12); popq %rbp; + CFI_POP(%rbp); popq %rbx; + CFI_POP(%rbx); /* stack already burned */ xorl %eax, %eax; .Lret: ret; + CFI_ENDPROC(); ELF(.size _gcry_sha1_transform_amd64_avx_bmi2, .-_gcry_sha1_transform_amd64_avx_bmi2;) diff --git a/cipher/sha1-avx2-bmi2-amd64.S b/cipher/sha1-avx2-bmi2-amd64.S index 2a2f21a5..93863230 100644 --- a/cipher/sha1-avx2-bmi2-amd64.S +++ b/cipher/sha1-avx2-bmi2-amd64.S @@ -34,18 +34,7 @@ defined(HAVE_GCC_INLINE_ASM_BMI2) && defined(HAVE_GCC_INLINE_ASM_AVX) && \ defined(HAVE_GCC_INLINE_ASM_AVX2) && defined(USE_SHA1) -#ifdef __PIC__ -# define RIP (%rip) -#else -# define RIP -#endif - - -#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS -# define ELF(...) __VA_ARGS__ -#else -# define ELF(...) /*_*/ -#endif +#include "asm-common-amd64.h" /* Context structure */ @@ -228,6 +217,7 @@ _gcry_sha1_transform_amd64_avx2_bmi2: * %rsi: data (64*nblks bytes) * %rdx: nblks (multiple of 2, larger than 0) */ + CFI_STARTPROC(); vzeroupper; @@ -235,10 +225,14 @@ _gcry_sha1_transform_amd64_avx2_bmi2: movq %rdi, RSTATE; movq %rsi, RDATA; pushq %rbx; + CFI_PUSH(%rbx); pushq %rbp; + CFI_PUSH(%rbp); pushq %r12; + CFI_PUSH(%r12); movq %rsp, ROLDSTACK; + CFI_DEF_CFA_REGISTER(ROLDSTACK); subq $(WK_STACK_WORDS*4), %rsp; andq $(~63), %rsp; @@ -251,11 +245,11 @@ _gcry_sha1_transform_amd64_avx2_bmi2: movl state_h4(RSTATE), e; xorl ne, ne; - vbroadcasti128 .Lbswap_shufb_ctl RIP, BSWAP_REG; - vpbroadcastd .LK1 RIP, K1; - vpbroadcastd .LK2 RIP, K2; - vpbroadcastd .LK3 RIP, K3; - vpbroadcastd .LK4 RIP, K4; + vbroadcasti128 .Lbswap_shufb_ctl rRIP, BSWAP_REG; + vpbroadcastd .LK1 rRIP, K1; + vpbroadcastd .LK2 rRIP, K2; + vpbroadcastd .LK3 rRIP, K3; + vpbroadcastd .LK4 rRIP, K4; /* Precalc 0-31 for block 1 & 2. */ W_PRECALC_00_15_0(0, W0, Wtmp0); @@ -557,15 +551,21 @@ _gcry_sha1_transform_amd64_avx2_bmi2: movl e, state_h4(RSTATE); movq ROLDSTACK, %rsp; + CFI_REGISTER(ROLDSTACK, %rsp); + CFI_DEF_CFA_REGISTER(%rsp); popq %r12; + CFI_POP(%r12); popq %rbp; + CFI_POP(%rbp); popq %rbx; + CFI_POP(%rbx); /* stack already burned */ xorl %eax, %eax; ret; + CFI_ENDPROC(); ELF(.size _gcry_sha1_transform_amd64_avx2_bmi2, .-_gcry_sha1_transform_amd64_avx2_bmi2;) diff --git a/cipher/sha1-ssse3-amd64.S b/cipher/sha1-ssse3-amd64.S index fff14034..7e32b0f4 100644 --- a/cipher/sha1-ssse3-amd64.S +++ b/cipher/sha1-ssse3-amd64.S @@ -33,18 +33,7 @@ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ defined(HAVE_GCC_INLINE_ASM_SSSE3) && defined(USE_SHA1) -#ifdef __PIC__ -# define RIP (%rip) -#else -# define RIP -#endif - - -#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS -# define ELF(...) __VA_ARGS__ -#else -# define ELF(...) /*_*/ -#endif +#include "asm-common-amd64.h" /* Context structure */ @@ -162,7 +151,7 @@ movdqa tmp0, W; #define W_PRECALC_00_15_2(i, W, tmp0) \ - paddd (.LK_XMM + ((i)/20)*16) RIP, tmp0; + paddd (.LK_XMM + ((i)/20)*16) rRIP, tmp0; #define W_PRECALC_00_15_3(i, W, tmp0) \ movdqa tmp0, WK(i&~3); @@ -193,7 +182,7 @@ pxor W, tmp0; \ pxor tmp1, tmp0; \ movdqa tmp0, W; \ - paddd (.LK_XMM + ((i)/20)*16) RIP, tmp0; \ + paddd (.LK_XMM + ((i)/20)*16) rRIP, tmp0; \ movdqa tmp0, WK((i)&~3); #define W_PRECALC_32_79_0(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28, tmp0) \ @@ -213,7 +202,7 @@ #define W_PRECALC_32_79_3(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28, tmp0) \ movdqa tmp0, W; \ - paddd (.LK_XMM + ((i)/20)*16) RIP, tmp0; \ + paddd (.LK_XMM + ((i)/20)*16) rRIP, tmp0; \ movdqa tmp0, WK((i)&~3); #define CLEAR_REG(reg) pxor reg, reg; @@ -235,6 +224,7 @@ _gcry_sha1_transform_amd64_ssse3: * %rsi: data (64*nblks bytes) * %rdx: nblks */ + CFI_STARTPROC(); xorl %eax, %eax; cmpq $0, %rdx; @@ -244,9 +234,12 @@ _gcry_sha1_transform_amd64_ssse3: movq %rdi, RSTATE; movq %rsi, RDATA; pushq %rbx; + CFI_PUSH(%rbx); pushq %rbp; + CFI_PUSH(%rbp); movq %rsp, ROLDSTACK; + CFI_DEF_CFA_REGISTER(ROLDSTACK); subq $(16*4), %rsp; andq $(~31), %rsp; @@ -258,7 +251,7 @@ _gcry_sha1_transform_amd64_ssse3: movl state_h3(RSTATE), d; movl state_h4(RSTATE), e; - movdqa .Lbswap_shufb_ctl RIP, BSWAP_REG; + movdqa .Lbswap_shufb_ctl rRIP, BSWAP_REG; /* Precalc 0-15. */ W_PRECALC_00_15_0(0, W0, Wtmp0); @@ -423,15 +416,20 @@ _gcry_sha1_transform_amd64_ssse3: movl e, state_h4(RSTATE); movq ROLDSTACK, %rsp; + CFI_REGISTER(ROLDSTACK, %rsp); + CFI_DEF_CFA_REGISTER(%rsp); popq %rbp; + CFI_POP(%rbp); popq %rbx; + CFI_POP(%rbx); /* stack already burned */ xorl %eax, %eax; .Lret: ret; + CFI_ENDPROC(); ELF(.size _gcry_sha1_transform_amd64_ssse3, .-_gcry_sha1_transform_amd64_ssse3;) diff --git a/cipher/sha256-avx-amd64.S b/cipher/sha256-avx-amd64.S index b8b01b15..77143ff0 100644 --- a/cipher/sha256-avx-amd64.S +++ b/cipher/sha256-avx-amd64.S @@ -59,17 +59,7 @@ defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ defined(HAVE_GCC_INLINE_ASM_AVX) && defined(USE_SHA256) -#ifdef __PIC__ -# define ADD_RIP +rip -#else -# define ADD_RIP -#endif - -#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS -# define ELF(...) __VA_ARGS__ -#else -# define ELF(...) /*_*/ -#endif +#include "asm-common-amd64.h" .intel_syntax noprefix @@ -380,15 +370,22 @@ rotate_Xs ELF(.type _gcry_sha256_transform_amd64_avx,@function;) .align 16 _gcry_sha256_transform_amd64_avx: + CFI_STARTPROC() vzeroupper push rbx + CFI_PUSH(rbx) push rbp + CFI_PUSH(rbp) push r13 + CFI_PUSH(r13) push r14 + CFI_PUSH(r14) push r15 + CFI_PUSH(r15) sub rsp, STACK_SIZE + CFI_ADJUST_CFA_OFFSET(STACK_SIZE); shl NUM_BLKS, 6 /* convert to bytes */ jz .Ldone_hash @@ -487,14 +484,21 @@ _gcry_sha256_transform_amd64_avx: xor eax, eax add rsp, STACK_SIZE + CFI_ADJUST_CFA_OFFSET(-STACK_SIZE); pop r15 + CFI_POP(r15) pop r14 + CFI_POP(r14) pop r13 + CFI_POP(r13) pop rbp + CFI_POP(rbp) pop rbx + CFI_POP(rbx) ret + CFI_ENDPROC() .align 16 diff --git a/cipher/sha256-avx2-bmi2-amd64.S b/cipher/sha256-avx2-bmi2-amd64.S index 5fc402cd..52be1a07 100644 --- a/cipher/sha256-avx2-bmi2-amd64.S +++ b/cipher/sha256-avx2-bmi2-amd64.S @@ -60,17 +60,7 @@ defined(HAVE_GCC_INLINE_ASM_AVX2) && defined(HAVE_GCC_INLINE_ASM_BMI2) && \ defined(USE_SHA256) -#ifdef __PIC__ -# define ADD_RIP +rip -#else -# define ADD_RIP -#endif - -#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS -# define ELF(...) __VA_ARGS__ -#else -# define ELF(...) /*_*/ -#endif +#include "asm-common-amd64.h" .intel_syntax noprefix @@ -314,17 +304,24 @@ a = TMP_ ELF(.type _gcry_sha256_transform_amd64_avx2,@function) .align 32 _gcry_sha256_transform_amd64_avx2: + CFI_STARTPROC() xor eax, eax cmp rdx, 0 je .Lnowork push rbx + CFI_PUSH(rbx) push rbp + CFI_PUSH(rbp) push r12 + CFI_PUSH(r12) push r13 + CFI_PUSH(r13) push r14 + CFI_PUSH(r14) push r15 + CFI_PUSH(r15) vzeroupper @@ -333,9 +330,11 @@ _gcry_sha256_transform_amd64_avx2: vmovdqa SHUF_DC00, [.L_SHUF_DC00 ADD_RIP] mov rax, rsp + CFI_DEF_CFA_REGISTER(rax); sub rsp, STACK_SIZE and rsp, ~63 mov [rsp + _RSP], rax + CFI_CFA_ON_STACK(_RSP, 6 * 8) shl NUM_BLKS, 6 /* convert to bytes */ lea NUM_BLKS, [NUM_BLKS + INP - 64] /* pointer to last block */ @@ -507,16 +506,24 @@ _gcry_sha256_transform_amd64_avx2: xor eax, eax mov rsp, [rsp + _RSP] + CFI_DEF_CFA_REGISTER(rsp) pop r15 + CFI_POP(r15) pop r14 + CFI_POP(r14) pop r13 + CFI_POP(r13) pop r12 + CFI_POP(r12) pop rbp + CFI_POP(rbp) pop rbx + CFI_POP(rbx) .Lnowork: ret + CFI_ENDPROC() .align 64 .LK256: diff --git a/cipher/sha256-ssse3-amd64.S b/cipher/sha256-ssse3-amd64.S index ca5c9fd1..0fb94c1b 100644 --- a/cipher/sha256-ssse3-amd64.S +++ b/cipher/sha256-ssse3-amd64.S @@ -60,17 +60,7 @@ defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ defined(HAVE_GCC_INLINE_ASM_SSSE3) && defined(USE_SHA256) -#ifdef __PIC__ -# define ADD_RIP +rip -#else -# define ADD_RIP -#endif - -#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS -# define ELF(...) __VA_ARGS__ -#else -# define ELF(...) /*_*/ -#endif +#include "asm-common-amd64.h" .intel_syntax noprefix @@ -386,13 +376,20 @@ rotate_Xs ELF(.type _gcry_sha256_transform_amd64_ssse3,@function;) .align 16 _gcry_sha256_transform_amd64_ssse3: + CFI_STARTPROC() push rbx + CFI_PUSH(rbx) push rbp + CFI_PUSH(rbp) push r13 + CFI_PUSH(r13) push r14 + CFI_PUSH(r14) push r15 + CFI_PUSH(r15) sub rsp, STACK_SIZE + CFI_ADJUST_CFA_OFFSET(STACK_SIZE); shl NUM_BLKS, 6 /* convert to bytes */ jz .Ldone_hash @@ -508,14 +505,21 @@ _gcry_sha256_transform_amd64_ssse3: xor eax, eax add rsp, STACK_SIZE + CFI_ADJUST_CFA_OFFSET(-STACK_SIZE); pop r15 + CFI_POP(r15) pop r14 + CFI_POP(r14) pop r13 + CFI_POP(r13) pop rbp + CFI_POP(rbp) pop rbx + CFI_POP(rbx) ret + CFI_ENDPROC() .align 16 diff --git a/cipher/sha512-avx-amd64.S b/cipher/sha512-avx-amd64.S index 534351e4..991fd639 100644 --- a/cipher/sha512-avx-amd64.S +++ b/cipher/sha512-avx-amd64.S @@ -46,17 +46,7 @@ defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ defined(HAVE_GCC_INLINE_ASM_AVX) && defined(USE_SHA512) -#ifdef __PIC__ -# define ADD_RIP +rip -#else -# define ADD_RIP -#endif - -#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS -# define ELF(...) __VA_ARGS__ -#else -# define ELF(...) /*_*/ -#endif +#include "asm-common-amd64.h" .intel_syntax noprefix @@ -269,6 +259,7 @@ frame_size = ((frame_GPRSAVE) + (frame_GPRSAVE_size)) ELF(.type _gcry_sha512_transform_amd64_avx,@function;) .align 16 _gcry_sha512_transform_amd64_avx: + CFI_STARTPROC() xor eax, eax cmp msglen, 0 @@ -278,6 +269,7 @@ _gcry_sha512_transform_amd64_avx: /* Allocate Stack Space */ sub rsp, frame_size + CFI_ADJUST_CFA_OFFSET(frame_size); /* Save GPRs */ mov [rsp + frame_GPRSAVE + 8 * 0], rbx @@ -285,6 +277,11 @@ _gcry_sha512_transform_amd64_avx: mov [rsp + frame_GPRSAVE + 8 * 2], r13 mov [rsp + frame_GPRSAVE + 8 * 3], r14 mov [rsp + frame_GPRSAVE + 8 * 4], r15 + CFI_REL_OFFSET(rbx, frame_GPRSAVE + 8 * 0); + CFI_REL_OFFSET(r12, frame_GPRSAVE + 8 * 1); + CFI_REL_OFFSET(r13, frame_GPRSAVE + 8 * 2); + CFI_REL_OFFSET(r14, frame_GPRSAVE + 8 * 3); + CFI_REL_OFFSET(r15, frame_GPRSAVE + 8 * 4); .Lupdateblock: @@ -351,6 +348,11 @@ _gcry_sha512_transform_amd64_avx: mov r13, [rsp + frame_GPRSAVE + 8 * 2] mov r14, [rsp + frame_GPRSAVE + 8 * 3] mov r15, [rsp + frame_GPRSAVE + 8 * 4] + CFI_RESTORE(rbx) + CFI_RESTORE(r12) + CFI_RESTORE(r13) + CFI_RESTORE(r14) + CFI_RESTORE(r15) vzeroall @@ -365,9 +367,11 @@ _gcry_sha512_transform_amd64_avx: /* Restore Stack Pointer */ add rsp, frame_size + CFI_ADJUST_CFA_OFFSET(-frame_size); .Lnowork: ret + CFI_ENDPROC() /* ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/cipher/sha512-avx2-bmi2-amd64.S b/cipher/sha512-avx2-bmi2-amd64.S index 32cfceb0..3b28ab6c 100644 --- a/cipher/sha512-avx2-bmi2-amd64.S +++ b/cipher/sha512-avx2-bmi2-amd64.S @@ -49,17 +49,7 @@ defined(HAVE_GCC_INLINE_ASM_AVX2) && defined(HAVE_GCC_INLINE_ASM_BMI2) && \ defined(USE_SHA512) -#ifdef __PIC__ -# define ADD_RIP +rip -#else -# define ADD_RIP -#endif - -#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS -# define ELF(...) __VA_ARGS__ -#else -# define ELF(...) /*_*/ -#endif +#include "asm-common-amd64.h" .intel_syntax noprefix @@ -352,6 +342,7 @@ y4 = r12 ELF(.type _gcry_sha512_transform_amd64_avx2,@function;) .align 16 _gcry_sha512_transform_amd64_avx2: + CFI_STARTPROC() xor eax, eax cmp rdx, 0 @@ -361,9 +352,11 @@ _gcry_sha512_transform_amd64_avx2: /* Allocate Stack Space */ mov rax, rsp + CFI_DEF_CFA_REGISTER(rax); sub rsp, frame_size and rsp, ~(0x40 - 1) mov [rsp + frame_RSPSAVE], rax + CFI_CFA_ON_STACK(frame_RSPSAVE, 0) /* Save GPRs */ mov [rsp + frame_GPRSAVE + 8 * 0], rbp @@ -372,6 +365,12 @@ _gcry_sha512_transform_amd64_avx2: mov [rsp + frame_GPRSAVE + 8 * 3], r13 mov [rsp + frame_GPRSAVE + 8 * 4], r14 mov [rsp + frame_GPRSAVE + 8 * 5], r15 + CFI_REG_ON_STACK(rbp, frame_GPRSAVE + 8 * 0) + CFI_REG_ON_STACK(rbx, frame_GPRSAVE + 8 * 1) + CFI_REG_ON_STACK(r12, frame_GPRSAVE + 8 * 2) + CFI_REG_ON_STACK(r13, frame_GPRSAVE + 8 * 3) + CFI_REG_ON_STACK(r14, frame_GPRSAVE + 8 * 4) + CFI_REG_ON_STACK(r15, frame_GPRSAVE + 8 * 5) mov [rsp + frame_NBLKS], NUM_BLKS @@ -494,11 +493,20 @@ _gcry_sha512_transform_amd64_avx2: mov r13, [rsp + frame_GPRSAVE + 8 * 3] mov r14, [rsp + frame_GPRSAVE + 8 * 4] mov r15, [rsp + frame_GPRSAVE + 8 * 5] + CFI_RESTORE(rbp) + CFI_RESTORE(rbx) + CFI_RESTORE(r12) + CFI_RESTORE(r13) + CFI_RESTORE(r14) + CFI_RESTORE(r15) /* Restore Stack Pointer */ mov rsp, [rsp + frame_RSPSAVE] + CFI_DEF_CFA_REGISTER(rsp) + .Lnowork: ret + CFI_ENDPROC() /*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; */ /*;; Binary Data */ diff --git a/cipher/sha512-ssse3-amd64.S b/cipher/sha512-ssse3-amd64.S index 8e950e0e..39bfe362 100644 --- a/cipher/sha512-ssse3-amd64.S +++ b/cipher/sha512-ssse3-amd64.S @@ -49,17 +49,7 @@ defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ defined(HAVE_GCC_INLINE_ASM_SSSE3) && defined(USE_SHA512) -#ifdef __PIC__ -# define ADD_RIP +rip -#else -# define ADD_RIP -#endif - -#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS -# define ELF(...) __VA_ARGS__ -#else -# define ELF(...) /*_*/ -#endif +#include "asm-common-amd64.h" .intel_syntax noprefix @@ -271,6 +261,7 @@ frame_size = ((frame_GPRSAVE) + (frame_GPRSAVE_size)) ELF(.type _gcry_sha512_transform_amd64_ssse3,@function;) .align 16 _gcry_sha512_transform_amd64_ssse3: + CFI_STARTPROC() xor eax, eax cmp msglen, 0 @@ -278,6 +269,7 @@ _gcry_sha512_transform_amd64_ssse3: /* Allocate Stack Space */ sub rsp, frame_size + CFI_ADJUST_CFA_OFFSET(frame_size); /* Save GPRs */ mov [rsp + frame_GPRSAVE + 8 * 0], rbx @@ -285,6 +277,11 @@ _gcry_sha512_transform_amd64_ssse3: mov [rsp + frame_GPRSAVE + 8 * 2], r13 mov [rsp + frame_GPRSAVE + 8 * 3], r14 mov [rsp + frame_GPRSAVE + 8 * 4], r15 + CFI_REL_OFFSET(rbx, frame_GPRSAVE + 8 * 0); + CFI_REL_OFFSET(r12, frame_GPRSAVE + 8 * 1); + CFI_REL_OFFSET(r13, frame_GPRSAVE + 8 * 2); + CFI_REL_OFFSET(r14, frame_GPRSAVE + 8 * 3); + CFI_REL_OFFSET(r15, frame_GPRSAVE + 8 * 4); .Lupdateblock: @@ -351,6 +348,11 @@ _gcry_sha512_transform_amd64_ssse3: mov r13, [rsp + frame_GPRSAVE + 8 * 2] mov r14, [rsp + frame_GPRSAVE + 8 * 3] mov r15, [rsp + frame_GPRSAVE + 8 * 4] + CFI_RESTORE(rbx) + CFI_RESTORE(r12) + CFI_RESTORE(r13) + CFI_RESTORE(r14) + CFI_RESTORE(r15) pxor xmm0, xmm0 pxor xmm1, xmm1 @@ -370,9 +372,11 @@ _gcry_sha512_transform_amd64_ssse3: /* Restore Stack Pointer */ add rsp, frame_size + CFI_ADJUST_CFA_OFFSET(-frame_size); .Lnowork: ret + CFI_ENDPROC() /* ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/cipher/twofish-amd64.S b/cipher/twofish-amd64.S index 134d6401..3cb73431 100644 --- a/cipher/twofish-amd64.S +++ b/cipher/twofish-amd64.S @@ -171,12 +171,16 @@ _gcry_twofish_amd64_encrypt_block: * %rsi: dst * %rdx: src */ + CFI_STARTPROC(); ENTER_SYSV_FUNC_PARAMS_0_4 subq $(3 * 8), %rsp; + CFI_ADJUST_CFA_OFFSET(3 * 8); movq %rsi, (0 * 8)(%rsp); movq %rbp, (1 * 8)(%rsp); movq %rbx, (2 * 8)(%rsp); + CFI_REL_OFFSET(%rbp, 1 * 8); + CFI_REL_OFFSET(%rbx, 2 * 8); movq %rdx, RX; inpack(RX, 0, RAd, 0); @@ -201,10 +205,14 @@ _gcry_twofish_amd64_encrypt_block: movq (2 * 8)(%rsp), %rbx; movq (1 * 8)(%rsp), %rbp; + CFI_RESTORE(%rbx); + CFI_RESTORE(%rbp); addq $(3 * 8), %rsp; + CFI_ADJUST_CFA_OFFSET(-3 * 8); EXIT_SYSV_FUNC ret; + CFI_ENDPROC(); ELF(.size _gcry_twofish_amd64_encrypt_block,.-_gcry_twofish_amd64_encrypt_block;) .align 8 @@ -217,12 +225,16 @@ _gcry_twofish_amd64_decrypt_block: * %rsi: dst * %rdx: src */ + CFI_STARTPROC(); ENTER_SYSV_FUNC_PARAMS_0_4 subq $(3 * 8), %rsp; + CFI_ADJUST_CFA_OFFSET(3 * 8); movq %rsi, (0 * 8)(%rsp); movq %rbp, (1 * 8)(%rsp); movq %rbx, (2 * 8)(%rsp); + CFI_REL_OFFSET(%rbp, 1 * 8); + CFI_REL_OFFSET(%rbx, 2 * 8); movq %rdx, RX; inpack(RX, 0, RCd, 4); @@ -247,10 +259,14 @@ _gcry_twofish_amd64_decrypt_block: movq (2 * 8)(%rsp), %rbx; movq (1 * 8)(%rsp), %rbp; + CFI_RESTORE(%rbx); + CFI_RESTORE(%rbp); addq $(3 * 8), %rsp; + CFI_ADJUST_CFA_OFFSET(-3 * 8); EXIT_SYSV_FUNC ret; + CFI_ENDPROC(); ELF(.size _gcry_twofish_amd64_encrypt_block,.-_gcry_twofish_amd64_encrypt_block;) #undef CTX @@ -480,6 +496,8 @@ __twofish_enc_blk3: * output: * RCD0,RAB0,RCD1,RAB1,RCD2,RAB2: three ciphertext blocks */ + CFI_STARTPROC(); + inpack_enc3(); encrypt_cycle3(RAB, RCD, 0); @@ -494,6 +512,7 @@ __twofish_enc_blk3: outunpack_enc3(); ret; + CFI_ENDPROC(); ELF(.size __twofish_enc_blk3,.-__twofish_enc_blk3;) .align 8 @@ -506,6 +525,8 @@ __twofish_dec_blk3: * output: * RCD0,RAB0,RCD1,RAB1,RCD2,RAB2: three plaintext blocks */ + CFI_STARTPROC(); + inpack_dec3(); decrypt_cycle3(RAB, RCD, 7); @@ -520,6 +541,7 @@ __twofish_dec_blk3: outunpack_dec3(); ret; + CFI_ENDPROC(); ELF(.size __twofish_dec_blk3,.-__twofish_dec_blk3;) .align 8 @@ -532,15 +554,23 @@ _gcry_twofish_amd64_ctr_enc: * %rdx: src (3 blocks) * %rcx: iv (big endian, 128bit) */ + CFI_STARTPROC(); ENTER_SYSV_FUNC_PARAMS_0_4 subq $(8 * 8), %rsp; + CFI_ADJUST_CFA_OFFSET(8 * 8); movq %rbp, (0 * 8)(%rsp); movq %rbx, (1 * 8)(%rsp); movq %r12, (2 * 8)(%rsp); movq %r13, (3 * 8)(%rsp); movq %r14, (4 * 8)(%rsp); movq %r15, (5 * 8)(%rsp); + CFI_REL_OFFSET(%rbp, 0 * 8); + CFI_REL_OFFSET(%rbx, 1 * 8); + CFI_REL_OFFSET(%r12, 2 * 8); + CFI_REL_OFFSET(%r13, 3 * 8); + CFI_REL_OFFSET(%r14, 4 * 8); + CFI_REL_OFFSET(%r15, 5 * 8); movq %rsi, (6 * 8)(%rsp); movq %rdx, (7 * 8)(%rsp); @@ -601,10 +631,18 @@ _gcry_twofish_amd64_ctr_enc: movq (3 * 8)(%rsp), %r13; movq (4 * 8)(%rsp), %r14; movq (5 * 8)(%rsp), %r15; + CFI_RESTORE(%rbp); + CFI_RESTORE(%rbx); + CFI_RESTORE(%r12); + CFI_RESTORE(%r13); + CFI_RESTORE(%r14); + CFI_RESTORE(%r15); addq $(8 * 8), %rsp; + CFI_ADJUST_CFA_OFFSET(-8 * 8); EXIT_SYSV_FUNC ret; + CFI_ENDPROC(); ELF(.size _gcry_twofish_amd64_ctr_enc,.-_gcry_twofish_amd64_ctr_enc;) .align 8 @@ -617,15 +655,23 @@ _gcry_twofish_amd64_cbc_dec: * %rdx: src (3 blocks) * %rcx: iv (128bit) */ + CFI_STARTPROC(); ENTER_SYSV_FUNC_PARAMS_0_4 subq $(9 * 8), %rsp; + CFI_ADJUST_CFA_OFFSET(9 * 8); movq %rbp, (0 * 8)(%rsp); movq %rbx, (1 * 8)(%rsp); movq %r12, (2 * 8)(%rsp); movq %r13, (3 * 8)(%rsp); movq %r14, (4 * 8)(%rsp); movq %r15, (5 * 8)(%rsp); + CFI_REL_OFFSET(%rbp, 0 * 8); + CFI_REL_OFFSET(%rbx, 1 * 8); + CFI_REL_OFFSET(%r12, 2 * 8); + CFI_REL_OFFSET(%r13, 3 * 8); + CFI_REL_OFFSET(%r14, 4 * 8); + CFI_REL_OFFSET(%r15, 5 * 8); movq %rsi, (6 * 8)(%rsp); movq %rdx, (7 * 8)(%rsp); @@ -670,10 +716,18 @@ _gcry_twofish_amd64_cbc_dec: movq (3 * 8)(%rsp), %r13; movq (4 * 8)(%rsp), %r14; movq (5 * 8)(%rsp), %r15; + CFI_RESTORE(%rbp); + CFI_RESTORE(%rbx); + CFI_RESTORE(%r12); + CFI_RESTORE(%r13); + CFI_RESTORE(%r14); + CFI_RESTORE(%r15); addq $(9 * 8), %rsp; + CFI_ADJUST_CFA_OFFSET(-9 * 8); EXIT_SYSV_FUNC ret; + CFI_ENDPROC(); ELF(.size _gcry_twofish_amd64_cbc_dec,.-_gcry_twofish_amd64_cbc_dec;) .align 8 @@ -686,15 +740,23 @@ _gcry_twofish_amd64_cfb_dec: * %rdx: src (3 blocks) * %rcx: iv (128bit) */ + CFI_STARTPROC(); ENTER_SYSV_FUNC_PARAMS_0_4 subq $(8 * 8), %rsp; + CFI_ADJUST_CFA_OFFSET(8 * 8); movq %rbp, (0 * 8)(%rsp); movq %rbx, (1 * 8)(%rsp); movq %r12, (2 * 8)(%rsp); movq %r13, (3 * 8)(%rsp); movq %r14, (4 * 8)(%rsp); movq %r15, (5 * 8)(%rsp); + CFI_REL_OFFSET(%rbp, 0 * 8); + CFI_REL_OFFSET(%rbx, 1 * 8); + CFI_REL_OFFSET(%r12, 2 * 8); + CFI_REL_OFFSET(%r13, 3 * 8); + CFI_REL_OFFSET(%r14, 4 * 8); + CFI_REL_OFFSET(%r15, 5 * 8); movq %rsi, (6 * 8)(%rsp); movq %rdx, (7 * 8)(%rsp); @@ -739,10 +801,18 @@ _gcry_twofish_amd64_cfb_dec: movq (3 * 8)(%rsp), %r13; movq (4 * 8)(%rsp), %r14; movq (5 * 8)(%rsp), %r15; + CFI_RESTORE(%rbp); + CFI_RESTORE(%rbx); + CFI_RESTORE(%r12); + CFI_RESTORE(%r13); + CFI_RESTORE(%r14); + CFI_RESTORE(%r15); addq $(8 * 8), %rsp; + CFI_ADJUST_CFA_OFFSET(-8 * 8); EXIT_SYSV_FUNC ret; + CFI_ENDPROC(); ELF(.size _gcry_twofish_amd64_cfb_dec,.-_gcry_twofish_amd64_cfb_dec;) .align 8 @@ -757,15 +827,23 @@ _gcry_twofish_amd64_ocb_enc: * %r8 : checksum * %r9 : L pointers (void *L[3]) */ + CFI_STARTPROC(); ENTER_SYSV_FUNC_PARAMS_6 subq $(8 * 8), %rsp; + CFI_ADJUST_CFA_OFFSET(8 * 8); movq %rbp, (0 * 8)(%rsp); movq %rbx, (1 * 8)(%rsp); movq %r12, (2 * 8)(%rsp); movq %r13, (3 * 8)(%rsp); movq %r14, (4 * 8)(%rsp); movq %r15, (5 * 8)(%rsp); + CFI_REL_OFFSET(%rbp, 0 * 8); + CFI_REL_OFFSET(%rbx, 1 * 8); + CFI_REL_OFFSET(%r12, 2 * 8); + CFI_REL_OFFSET(%r13, 3 * 8); + CFI_REL_OFFSET(%r14, 4 * 8); + CFI_REL_OFFSET(%r15, 5 * 8); movq %rsi, (6 * 8)(%rsp); movq %rdx, RX0; @@ -849,10 +927,18 @@ _gcry_twofish_amd64_ocb_enc: movq (3 * 8)(%rsp), %r13; movq (4 * 8)(%rsp), %r14; movq (5 * 8)(%rsp), %r15; + CFI_RESTORE(%rbp); + CFI_RESTORE(%rbx); + CFI_RESTORE(%r12); + CFI_RESTORE(%r13); + CFI_RESTORE(%r14); + CFI_RESTORE(%r15); addq $(8 * 8), %rsp; + CFI_ADJUST_CFA_OFFSET(-8 * 8); EXIT_SYSV_FUNC ret; + CFI_ENDPROC(); ELF(.size _gcry_twofish_amd64_ocb_enc,.-_gcry_twofish_amd64_ocb_enc;) .align 8 @@ -867,15 +953,23 @@ _gcry_twofish_amd64_ocb_dec: * %r8 : checksum * %r9 : L pointers (void *L[3]) */ + CFI_STARTPROC(); ENTER_SYSV_FUNC_PARAMS_6 subq $(8 * 8), %rsp; + CFI_ADJUST_CFA_OFFSET(8 * 8); movq %rbp, (0 * 8)(%rsp); movq %rbx, (1 * 8)(%rsp); movq %r12, (2 * 8)(%rsp); movq %r13, (3 * 8)(%rsp); movq %r14, (4 * 8)(%rsp); movq %r15, (5 * 8)(%rsp); + CFI_REL_OFFSET(%rbp, 0 * 8); + CFI_REL_OFFSET(%rbx, 1 * 8); + CFI_REL_OFFSET(%r12, 2 * 8); + CFI_REL_OFFSET(%r13, 3 * 8); + CFI_REL_OFFSET(%r14, 4 * 8); + CFI_REL_OFFSET(%r15, 5 * 8); movq %rsi, (6 * 8)(%rsp); movq %r8, (7 * 8)(%rsp); @@ -967,10 +1061,18 @@ _gcry_twofish_amd64_ocb_dec: movq (3 * 8)(%rsp), %r13; movq (4 * 8)(%rsp), %r14; movq (5 * 8)(%rsp), %r15; + CFI_RESTORE(%rbp); + CFI_RESTORE(%rbx); + CFI_RESTORE(%r12); + CFI_RESTORE(%r13); + CFI_RESTORE(%r14); + CFI_RESTORE(%r15); addq $(8 * 8), %rsp; + CFI_ADJUST_CFA_OFFSET(-8 * 8); EXIT_SYSV_FUNC ret; + CFI_ENDPROC(); ELF(.size _gcry_twofish_amd64_ocb_dec,.-_gcry_twofish_amd64_ocb_dec;) .align 8 @@ -984,15 +1086,23 @@ _gcry_twofish_amd64_ocb_auth: * %rcx: checksum * %r8 : L pointers (void *L[3]) */ + CFI_STARTPROC(); ENTER_SYSV_FUNC_PARAMS_5 subq $(8 * 8), %rsp; + CFI_ADJUST_CFA_OFFSET(8 * 8); movq %rbp, (0 * 8)(%rsp); movq %rbx, (1 * 8)(%rsp); movq %r12, (2 * 8)(%rsp); movq %r13, (3 * 8)(%rsp); movq %r14, (4 * 8)(%rsp); movq %r15, (5 * 8)(%rsp); + CFI_REL_OFFSET(%rbp, 0 * 8); + CFI_REL_OFFSET(%rbx, 1 * 8); + CFI_REL_OFFSET(%r12, 2 * 8); + CFI_REL_OFFSET(%r13, 3 * 8); + CFI_REL_OFFSET(%r14, 4 * 8); + CFI_REL_OFFSET(%r15, 5 * 8); movq %rcx, (6 * 8)(%rsp); movq %rsi, RX0; @@ -1056,10 +1166,18 @@ _gcry_twofish_amd64_ocb_auth: movq (3 * 8)(%rsp), %r13; movq (4 * 8)(%rsp), %r14; movq (5 * 8)(%rsp), %r15; + CFI_RESTORE(%rbp); + CFI_RESTORE(%rbx); + CFI_RESTORE(%r12); + CFI_RESTORE(%r13); + CFI_RESTORE(%r14); + CFI_RESTORE(%r15); addq $(8 * 8), %rsp; + CFI_ADJUST_CFA_OFFSET(-8 * 8); EXIT_SYSV_FUNC ret; + CFI_ENDPROC(); ELF(.size _gcry_twofish_amd64_ocb_auth,.-_gcry_twofish_amd64_ocb_auth;) #endif /*USE_TWOFISH*/ diff --git a/cipher/twofish-avx2-amd64.S b/cipher/twofish-avx2-amd64.S index db6e2182..74cad355 100644 --- a/cipher/twofish-avx2-amd64.S +++ b/cipher/twofish-avx2-amd64.S @@ -24,17 +24,7 @@ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_TWOFISH) && \ defined(ENABLE_AVX2_SUPPORT) -#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS -# define ELF(...) __VA_ARGS__ -#else -# define ELF(...) /*_*/ -#endif - -#ifdef __PIC__ -# define RIP (%rip) -#else -# define RIP -#endif +#include "asm-common-amd64.h" .text @@ -423,6 +413,7 @@ __twofish_enc_blk16: * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: sixteen parallel * ciphertext blocks */ + CFI_STARTPROC(); init_round_constants(); transpose4x4_16(RA, RB, RC, RD); @@ -441,6 +432,7 @@ __twofish_enc_blk16: transpose4x4_16(RA, RB, RC, RD); ret; + CFI_ENDPROC(); ELF(.size __twofish_enc_blk16,.-__twofish_enc_blk16;) .align 8 @@ -454,6 +446,7 @@ __twofish_dec_blk16: * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: sixteen parallel * ciphertext blocks */ + CFI_STARTPROC(); init_round_constants(); transpose4x4_16(RA, RB, RC, RD); @@ -472,6 +465,7 @@ __twofish_dec_blk16: transpose4x4_16(RA, RB, RC, RD); ret; + CFI_ENDPROC(); ELF(.size __twofish_dec_blk16,.-__twofish_dec_blk16;) #define inc_le128(x, minus_one, tmp) \ @@ -490,13 +484,14 @@ _gcry_twofish_avx2_ctr_enc: * %rdx: src (16 blocks) * %rcx: iv (big endian, 128bit) */ + CFI_STARTPROC(); movq 8(%rcx), %rax; bswapq %rax; vzeroupper; - vbroadcasti128 .Lbswap128_mask RIP, RTMP3; + vbroadcasti128 .Lbswap128_mask rRIP, RTMP3; vpcmpeqd RNOT, RNOT, RNOT; vpsrldq $8, RNOT, RNOT; /* ab: -1:0 ; cd: -1:0 */ vpaddq RNOT, RNOT, RTMP2; /* ab: -2:0 ; cd: -2:0 */ @@ -587,7 +582,8 @@ _gcry_twofish_avx2_ctr_enc: vzeroall; - ret + ret; + CFI_ENDPROC(); ELF(.size _gcry_twofish_avx2_ctr_enc,.-_gcry_twofish_avx2_ctr_enc;) .align 8 @@ -600,6 +596,7 @@ _gcry_twofish_avx2_cbc_dec: * %rdx: src (16 blocks) * %rcx: iv */ + CFI_STARTPROC(); vzeroupper; @@ -638,7 +635,8 @@ _gcry_twofish_avx2_cbc_dec: vzeroall; - ret + ret; + CFI_ENDPROC(); ELF(.size _gcry_twofish_avx2_cbc_dec,.-_gcry_twofish_avx2_cbc_dec;) .align 8 @@ -651,6 +649,7 @@ _gcry_twofish_avx2_cfb_dec: * %rdx: src (16 blocks) * %rcx: iv */ + CFI_STARTPROC(); vzeroupper; @@ -691,7 +690,8 @@ _gcry_twofish_avx2_cfb_dec: vzeroall; - ret + ret; + CFI_ENDPROC(); ELF(.size _gcry_twofish_avx2_cfb_dec,.-_gcry_twofish_avx2_cfb_dec;) .align 8 @@ -707,15 +707,21 @@ _gcry_twofish_avx2_ocb_enc: * %r8 : checksum * %r9 : L pointers (void *L[16]) */ + CFI_STARTPROC(); vzeroupper; subq $(4 * 8), %rsp; + CFI_ADJUST_CFA_OFFSET(4 * 8); movq %r10, (0 * 8)(%rsp); movq %r11, (1 * 8)(%rsp); movq %r12, (2 * 8)(%rsp); movq %r13, (3 * 8)(%rsp); + CFI_REL_OFFSET(%r10, 0 * 8); + CFI_REL_OFFSET(%r11, 1 * 8); + CFI_REL_OFFSET(%r12, 2 * 8); + CFI_REL_OFFSET(%r13, 3 * 8); vmovdqu (%rcx), RTMP0x; vmovdqu (%r8), RTMP1x; @@ -768,10 +774,15 @@ _gcry_twofish_avx2_ocb_enc: movq (1 * 8)(%rsp), %r11; movq (2 * 8)(%rsp), %r12; movq (3 * 8)(%rsp), %r13; + CFI_RESTORE(%r10); + CFI_RESTORE(%r11); + CFI_RESTORE(%r12); + CFI_RESTORE(%r13); call __twofish_enc_blk16; addq $(4 * 8), %rsp; + CFI_ADJUST_CFA_OFFSET(-4 * 8); vpxor (0 * 32)(%rsi), RA0, RA0; vpxor (1 * 32)(%rsi), RB0, RB0; @@ -794,6 +805,7 @@ _gcry_twofish_avx2_ocb_enc: vzeroall; ret; + CFI_ENDPROC(); ELF(.size _gcry_twofish_avx2_ocb_enc,.-_gcry_twofish_avx2_ocb_enc;) .align 8 @@ -809,15 +821,21 @@ _gcry_twofish_avx2_ocb_dec: * %r8 : checksum * %r9 : L pointers (void *L[16]) */ + CFI_STARTPROC(); vzeroupper; subq $(4 * 8), %rsp; + CFI_ADJUST_CFA_OFFSET(4 * 8); movq %r10, (0 * 8)(%rsp); movq %r11, (1 * 8)(%rsp); movq %r12, (2 * 8)(%rsp); movq %r13, (3 * 8)(%rsp); + CFI_REL_OFFSET(%r10, 0 * 8); + CFI_REL_OFFSET(%r11, 1 * 8); + CFI_REL_OFFSET(%r12, 2 * 8); + CFI_REL_OFFSET(%r13, 3 * 8); vmovdqu (%rcx), RTMP0x; @@ -865,6 +883,10 @@ _gcry_twofish_avx2_ocb_dec: movq (1 * 8)(%rsp), %r11; movq (2 * 8)(%rsp), %r12; movq (3 * 8)(%rsp), %r13; + CFI_RESTORE(%r10); + CFI_RESTORE(%r11); + CFI_RESTORE(%r12); + CFI_RESTORE(%r13); call __twofish_dec_blk16; @@ -880,6 +902,7 @@ _gcry_twofish_avx2_ocb_dec: vpxor (7 * 32)(%rsi), RD1, RD1; addq $(4 * 8), %rsp; + CFI_ADJUST_CFA_OFFSET(-4 * 8); /* Checksum_i = Checksum_{i-1} xor P_i */ @@ -907,6 +930,7 @@ _gcry_twofish_avx2_ocb_dec: vzeroall; ret; + CFI_ENDPROC(); ELF(.size _gcry_twofish_avx2_ocb_dec,.-_gcry_twofish_avx2_ocb_dec;) .align 8 @@ -921,15 +945,21 @@ _gcry_twofish_avx2_ocb_auth: * %rcx: checksum * %r8 : L pointers (void *L[16]) */ + CFI_STARTPROC(); vzeroupper; subq $(4 * 8), %rsp; + CFI_ADJUST_CFA_OFFSET(4 * 8); movq %r10, (0 * 8)(%rsp); movq %r11, (1 * 8)(%rsp); movq %r12, (2 * 8)(%rsp); movq %r13, (3 * 8)(%rsp); + CFI_REL_OFFSET(%r10, 0 * 8); + CFI_REL_OFFSET(%r11, 1 * 8); + CFI_REL_OFFSET(%r12, 2 * 8); + CFI_REL_OFFSET(%r13, 3 * 8); vmovdqu (%rdx), RTMP0x; @@ -975,6 +1005,10 @@ _gcry_twofish_avx2_ocb_auth: movq (1 * 8)(%rsp), %r11; movq (2 * 8)(%rsp), %r12; movq (3 * 8)(%rsp), %r13; + CFI_RESTORE(%r10); + CFI_RESTORE(%r11); + CFI_RESTORE(%r12); + CFI_RESTORE(%r13); call __twofish_enc_blk16; @@ -987,6 +1021,7 @@ _gcry_twofish_avx2_ocb_auth: vpxor RA1, RC1, RA1; addq $(4 * 8), %rsp; + CFI_ADJUST_CFA_OFFSET(-4 * 8); vpxor RA1, RA0, RTMP1; @@ -998,6 +1033,7 @@ _gcry_twofish_avx2_ocb_auth: vzeroall; ret; + CFI_ENDPROC(); ELF(.size _gcry_twofish_avx2_ocb_auth,.-_gcry_twofish_avx2_ocb_auth;) .align 16 diff --git a/cipher/whirlpool-sse2-amd64.S b/cipher/whirlpool-sse2-amd64.S index e98b831c..5631dc56 100644 --- a/cipher/whirlpool-sse2-amd64.S +++ b/cipher/whirlpool-sse2-amd64.S @@ -23,17 +23,7 @@ #if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_WHIRLPOOL) -#ifdef __PIC__ -# define RIP %rip -#else -# define RIP -#endif - -#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS -# define ELF(...) __VA_ARGS__ -#else -# define ELF(...) /*_*/ -#endif +#include "asm-common-amd64.h" .text @@ -173,16 +163,24 @@ _gcry_whirlpool_transform_amd64: * %rdx: nblks * %rcx: look-up tables */ + CFI_STARTPROC(); cmp $0, %rdx; je .Lskip; subq $STACK_MAX, %rsp; + CFI_ADJUST_CFA_OFFSET(STACK_MAX); movq %rbp, STACK_RBP(%rsp); movq %rbx, STACK_RBX(%rsp); movq %r12, STACK_R12(%rsp); movq %r13, STACK_R13(%rsp); movq %r14, STACK_R14(%rsp); movq %r15, STACK_R15(%rsp); + CFI_REL_OFFSET(%rbp, STACK_RBP); + CFI_REL_OFFSET(%rbx, STACK_RBX); + CFI_REL_OFFSET(%r12, STACK_R12); + CFI_REL_OFFSET(%r13, STACK_R13); + CFI_REL_OFFSET(%r14, STACK_R14); + CFI_REL_OFFSET(%r15, STACK_R15); movq %rdx, STACK_NBLKS(%rsp); movq %rdi, STACK_STATEP(%rsp); @@ -332,10 +330,18 @@ _gcry_whirlpool_transform_amd64: movq STACK_R13(%rsp), %r13; movq STACK_R14(%rsp), %r14; movq STACK_R15(%rsp), %r15; + CFI_RESTORE(%rbp); + CFI_RESTORE(%rbx); + CFI_RESTORE(%r12); + CFI_RESTORE(%r13); + CFI_RESTORE(%r14); + CFI_RESTORE(%r15); addq $STACK_MAX, %rsp; + CFI_ADJUST_CFA_OFFSET(-STACK_MAX); .Lskip: movl $(STACK_MAX + 8), %eax; ret; + CFI_ENDPROC(); ELF(.size _gcry_whirlpool_transform_amd64,.-_gcry_whirlpool_transform_amd64;) #endif |