diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2019-04-15 19:46:53 +0300 |
---|---|---|
committer | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2019-04-16 23:03:36 +0300 |
commit | d11ae95d05dc39ec6b825d1109afadd964589880 (patch) | |
tree | 6a36256a9a816cd8e49fb6be5fcb4a9b6f9d007d /cipher/camellia-aesni-avx-amd64.S | |
parent | 0903b215ef5a18332b740a24e6e2bfbed9e1d97b (diff) | |
download | libgcrypt-d11ae95d05dc39ec6b825d1109afadd964589880.tar.gz |
Add CFI unwind assembly directives for AMD64 assembly
* configure.ac (gcry_cv_gcc_asm_cfi_directives): New.
* cipher/asm-common-amd64.h (ADD_RIP, CFI_STARTPROC, CFI_ENDPROC)
(CFI_REMEMBER_STATE, CFI_RESTORE_STATE, CFI_ADJUST_CFA_OFFSET)
(CFI_REL_OFFSET, CFI_DEF_CFA_REGISTER, CFI_REGISTER, CFI_RESTORE)
(CFI_PUSH, CFI_POP, CFI_POP_TMP_REG, CFI_LEAVE, DW_REGNO)
(DW_SLEB128_7BIT, DW_SLEB128_28BIT, CFI_CFA_ON_STACK)
(CFI_REG_ON_STACK): New.
(ENTER_SYSV_FUNCPARAMS_0_4, EXIT_SYSV_FUNC): Add CFI directives.
* cipher/arcfour-amd64.S: Add CFI directives.
* cipher/blake2b-amd64-avx2.S: Add CFI directives.
* cipher/blake2s-amd64-avx.S: Add CFI directives.
* cipher/blowfish-amd64.S: Add CFI directives.
* cipher/camellia-aesni-avx-amd64.S: Add CFI directives; Use
'asm-common-amd64.h'.
* cipher/camellia-aesni-avx2-amd64.S: Add CFI directives; Use
'asm-common-amd64.h'.
* cipher/cast5-amd64.S: Add CFI directives.
* cipher/chacha20-amd64-avx2.S: Add CFI directives.
* cipher/chacha20-amd64-ssse3.S: Add CFI directives.
* cipher/des-amd64.S: Add CFI directives.
* cipher/rijndael-amd64.S: Add CFI directives.
* cipher/rijndael-ssse3-amd64-asm.S: Add CFI directives.
* cipher/salsa20-amd64.S: Add CFI directives; Use 'asm-common-amd64.h'.
* cipher/serpent-avx2-amd64.S: Add CFI directives; Use
'asm-common-amd64.h'.
* cipher/serpent-sse2-amd64.S: Add CFI directives; Use
'asm-common-amd64.h'.
* cipher/sha1-avx-amd64.S: Add CFI directives; Use
'asm-common-amd64.h'.
* cipher/sha1-avx-bmi2-amd64.S: Add CFI directives; Use
'asm-common-amd64.h'.
* cipher/sha1-avx2-bmi2-amd64.S: Add CFI directives; Use
'asm-common-amd64.h'.
* cipher/sha1-ssse3-amd64.S: Add CFI directives; Use
'asm-common-amd64.h'.
* cipher/sha256-avx-amd64.S: Add CFI directives; Use
'asm-common-amd64.h'.
* cipher/sha256-avx2-bmi2-amd64.S: Add CFI directives; Use
'asm-common-amd64.h'.
* cipher/sha256-ssse3-amd64.S: Add CFI directives; Use
'asm-common-amd64.h'.
* cipher/sha512-avx-amd64.S: Add CFI directives; Use
'asm-common-amd64.h'.
* cipher/sha512-avx2-bmi2-amd64.S: Add CFI directives; Use
'asm-common-amd64.h'.
* cipher/sha512-ssse3-amd64.S: Add CFI directives; Use
'asm-common-amd64.h'.
* cipher/twofish-amd64.S: Add CFI directives.
* cipher/twofish-avx2-amd64.S: Add CFI directives; Use
'asm-common-amd64.h'.
* cipher/whirlpool-sse2-amd64.S: Add CFI directives; Use
'asm-common-amd64.h'.
* mpi/amd64/func_abi.h: Include 'config.h'.
(CFI_STARTPROC, CFI_ENDPROC, CFI_ADJUST_CFA_OFFSET, CFI_REL_OFFSET)
(CFI_RESTORE, CFI_PUSH, CFI_POP): New.
(FUNC_ENTRY, FUNC_EXIT): Add CFI directives.
--
This commit adds CFI directives that add DWARF unwinding information for
debugger to backtrace when executing code from AMD64 assembly files.
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/camellia-aesni-avx-amd64.S')
-rw-r--r-- | cipher/camellia-aesni-avx-amd64.S | 230 |
1 files changed, 143 insertions, 87 deletions
diff --git a/cipher/camellia-aesni-avx-amd64.S b/cipher/camellia-aesni-avx-amd64.S index 8022934f..e16d4f61 100644 --- a/cipher/camellia-aesni-avx-amd64.S +++ b/cipher/camellia-aesni-avx-amd64.S @@ -24,17 +24,7 @@ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT) -#ifdef __PIC__ -# define RIP (%rip) -#else -# define RIP -#endif - -#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS -# define ELF(...) __VA_ARGS__ -#else -# define ELF(...) /*_*/ -#endif +#include "asm-common-amd64.h" #define CAMELLIA_TABLE_BYTE_LEN 272 @@ -75,10 +65,10 @@ /* \ * S-function with AES subbytes \ */ \ - vmovdqa .Linv_shift_row RIP, t4; \ - vbroadcastss .L0f0f0f0f RIP, t7; \ - vmovdqa .Lpre_tf_lo_s1 RIP, t0; \ - vmovdqa .Lpre_tf_hi_s1 RIP, t1; \ + vmovdqa .Linv_shift_row rRIP, t4; \ + vbroadcastss .L0f0f0f0f rRIP, t7; \ + vmovdqa .Lpre_tf_lo_s1 rRIP, t0; \ + vmovdqa .Lpre_tf_hi_s1 rRIP, t1; \ \ /* AES inverse shift rows */ \ vpshufb t4, x0, x0; \ @@ -91,8 +81,8 @@ vpshufb t4, x6, x6; \ \ /* prefilter sboxes 1, 2 and 3 */ \ - vmovdqa .Lpre_tf_lo_s4 RIP, t2; \ - vmovdqa .Lpre_tf_hi_s4 RIP, t3; \ + vmovdqa .Lpre_tf_lo_s4 rRIP, t2; \ + vmovdqa .Lpre_tf_hi_s4 rRIP, t3; \ filter_8bit(x0, t0, t1, t7, t6); \ filter_8bit(x7, t0, t1, t7, t6); \ filter_8bit(x1, t0, t1, t7, t6); \ @@ -106,8 +96,8 @@ filter_8bit(x6, t2, t3, t7, t6); \ \ /* AES subbytes + AES shift rows */ \ - vmovdqa .Lpost_tf_lo_s1 RIP, t0; \ - vmovdqa .Lpost_tf_hi_s1 RIP, t1; \ + vmovdqa .Lpost_tf_lo_s1 rRIP, t0; \ + vmovdqa .Lpost_tf_hi_s1 rRIP, t1; \ vaesenclast t4, x0, x0; \ vaesenclast t4, x7, x7; \ vaesenclast t4, x1, x1; \ @@ -118,16 +108,16 @@ vaesenclast t4, x6, x6; \ \ /* postfilter sboxes 1 and 4 */ \ - vmovdqa .Lpost_tf_lo_s3 RIP, t2; \ - vmovdqa .Lpost_tf_hi_s3 RIP, t3; \ + vmovdqa .Lpost_tf_lo_s3 rRIP, t2; \ + vmovdqa .Lpost_tf_hi_s3 rRIP, t3; \ filter_8bit(x0, t0, t1, t7, t6); \ filter_8bit(x7, t0, t1, t7, t6); \ filter_8bit(x3, t0, t1, t7, t6); \ filter_8bit(x6, t0, t1, t7, t6); \ \ /* postfilter sbox 3 */ \ - vmovdqa .Lpost_tf_lo_s2 RIP, t4; \ - vmovdqa .Lpost_tf_hi_s2 RIP, t5; \ + vmovdqa .Lpost_tf_lo_s2 rRIP, t4; \ + vmovdqa .Lpost_tf_hi_s2 rRIP, t5; \ filter_8bit(x2, t2, t3, t7, t6); \ filter_8bit(x5, t2, t3, t7, t6); \ \ @@ -442,7 +432,7 @@ transpose_4x4(c0, c1, c2, c3, a0, a1); \ transpose_4x4(d0, d1, d2, d3, a0, a1); \ \ - vmovdqu .Lshufb_16x16b RIP, a0; \ + vmovdqu .Lshufb_16x16b rRIP, a0; \ vmovdqu st1, a1; \ vpshufb a0, a2, a2; \ vpshufb a0, a3, a3; \ @@ -508,7 +498,7 @@ vpunpcklwd t1, t3, e; \ vpunpckhwd t1, t3, f; \ \ - vmovdqa .Ltranspose_8x8_shuf RIP, t3; \ + vmovdqa .Ltranspose_8x8_shuf rRIP, t3; \ \ vpunpcklwd g, c, d; \ vpunpckhwd g, c, c; \ @@ -540,7 +530,7 @@ #define inpack16_pre(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ y6, y7, rio, key) \ vmovq key, x0; \ - vpshufb .Lpack_bswap RIP, x0, x0; \ + vpshufb .Lpack_bswap rRIP, x0, x0; \ \ vpxor 0 * 16(rio), x0, y7; \ vpxor 1 * 16(rio), x0, y6; \ @@ -591,7 +581,7 @@ vmovdqu x0, stack_tmp0; \ \ vmovq key, x0; \ - vpshufb .Lpack_bswap RIP, x0, x0; \ + vpshufb .Lpack_bswap rRIP, x0, x0; \ \ vpxor x0, y7, y7; \ vpxor x0, y6, y6; \ @@ -786,6 +776,7 @@ __camellia_enc_blk16: * %xmm0..%xmm15: 16 encrypted blocks, order swapped: * 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 */ + CFI_STARTPROC(); leaq 8 * 16(%rax), %rcx; @@ -859,6 +850,7 @@ __camellia_enc_blk16: %xmm15, %rax, %rcx, 24); jmp .Lenc_done; + CFI_ENDPROC(); ELF(.size __camellia_enc_blk16,.-__camellia_enc_blk16;) .align 8 @@ -874,6 +866,7 @@ __camellia_dec_blk16: * %xmm0..%xmm15: 16 plaintext blocks, order swapped: * 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 */ + CFI_STARTPROC(); leaq 8 * 16(%rax), %rcx; @@ -944,6 +937,7 @@ __camellia_dec_blk16: ((key_table + (24) * 8) + 4)(CTX)); jmp .Ldec_max24; + CFI_ENDPROC(); ELF(.size __camellia_dec_blk16,.-__camellia_dec_blk16;) #define inc_le128(x, minus_one, tmp) \ @@ -963,9 +957,12 @@ _gcry_camellia_aesni_avx_ctr_enc: * %rdx: src (16 blocks) * %rcx: iv (big endian, 128bit) */ + CFI_STARTPROC(); pushq %rbp; + CFI_PUSH(%rbp); movq %rsp, %rbp; + CFI_DEF_CFA_REGISTER(%rbp); vzeroupper; @@ -973,7 +970,7 @@ _gcry_camellia_aesni_avx_ctr_enc: andq $~31, %rsp; movq %rsp, %rax; - vmovdqa .Lbswap128_mask RIP, %xmm14; + vmovdqa .Lbswap128_mask rRIP, %xmm14; /* load IV and byteswap */ vmovdqu (%rcx), %xmm15; @@ -1018,12 +1015,12 @@ _gcry_camellia_aesni_avx_ctr_enc: vmovdqa %xmm0, %xmm13; vpshufb %xmm14, %xmm0, %xmm0; inc_le128(%xmm13, %xmm15, %xmm14); - vpshufb .Lbswap128_mask RIP, %xmm13, %xmm13; /* le => be */ + vpshufb .Lbswap128_mask rRIP, %xmm13, %xmm13; /* le => be */ vmovdqu %xmm13, (%rcx); /* inpack16_pre: */ vmovq (key_table)(CTX), %xmm15; - vpshufb .Lpack_bswap RIP, %xmm15, %xmm15; + vpshufb .Lpack_bswap rRIP, %xmm15, %xmm15; vpxor %xmm0, %xmm15, %xmm0; vpxor %xmm1, %xmm15, %xmm1; vpxor %xmm2, %xmm15, %xmm2; @@ -1067,7 +1064,9 @@ _gcry_camellia_aesni_avx_ctr_enc: vzeroall; leave; + CFI_LEAVE(); ret; + CFI_ENDPROC(); ELF(.size _gcry_camellia_aesni_avx_ctr_enc,.-_gcry_camellia_aesni_avx_ctr_enc;) .align 8 @@ -1081,9 +1080,12 @@ _gcry_camellia_aesni_avx_cbc_dec: * %rdx: src (16 blocks) * %rcx: iv */ + CFI_STARTPROC(); pushq %rbp; + CFI_PUSH(%rbp); movq %rsp, %rbp; + CFI_DEF_CFA_REGISTER(%rbp); vzeroupper; @@ -1135,7 +1137,9 @@ _gcry_camellia_aesni_avx_cbc_dec: vzeroall; leave; + CFI_LEAVE(); ret; + CFI_ENDPROC(); ELF(.size _gcry_camellia_aesni_avx_cbc_dec,.-_gcry_camellia_aesni_avx_cbc_dec;) .align 8 @@ -1149,9 +1153,12 @@ _gcry_camellia_aesni_avx_cfb_dec: * %rdx: src (16 blocks) * %rcx: iv */ + CFI_STARTPROC(); pushq %rbp; + CFI_PUSH(%rbp); movq %rsp, %rbp; + CFI_DEF_CFA_REGISTER(%rbp); vzeroupper; @@ -1161,7 +1168,7 @@ _gcry_camellia_aesni_avx_cfb_dec: /* inpack16_pre: */ vmovq (key_table)(CTX), %xmm0; - vpshufb .Lpack_bswap RIP, %xmm0, %xmm0; + vpshufb .Lpack_bswap rRIP, %xmm0, %xmm0; vpxor (%rcx), %xmm0, %xmm15; vmovdqu 15 * 16(%rdx), %xmm1; vmovdqu %xmm1, (%rcx); /* store new IV */ @@ -1207,7 +1214,9 @@ _gcry_camellia_aesni_avx_cfb_dec: vzeroall; leave; + CFI_LEAVE(); ret; + CFI_ENDPROC(); ELF(.size _gcry_camellia_aesni_avx_cfb_dec,.-_gcry_camellia_aesni_avx_cfb_dec;) .align 8 @@ -1223,9 +1232,12 @@ _gcry_camellia_aesni_avx_ocb_enc: * %r8 : checksum * %r9 : L pointers (void *L[16]) */ + CFI_STARTPROC(); pushq %rbp; + CFI_PUSH(%rbp); movq %rsp, %rbp; + CFI_DEF_CFA_REGISTER(%rbp); vzeroupper; @@ -1233,10 +1245,14 @@ _gcry_camellia_aesni_avx_ocb_enc: andq $~31, %rsp; movq %rsp, %rax; - movq %r10, (16 * 16 + 0 * 8)(%rax); - movq %r11, (16 * 16 + 1 * 8)(%rax); - movq %r12, (16 * 16 + 2 * 8)(%rax); - movq %r13, (16 * 16 + 3 * 8)(%rax); + movq %r10, (16 * 16 + 0 * 8)(%rsp); + movq %r11, (16 * 16 + 1 * 8)(%rsp); + movq %r12, (16 * 16 + 2 * 8)(%rsp); + movq %r13, (16 * 16 + 3 * 8)(%rsp); + CFI_REG_ON_STACK(r10, 16 * 16 + 0 * 8); + CFI_REG_ON_STACK(r11, 16 * 16 + 1 * 8); + CFI_REG_ON_STACK(r12, 16 * 16 + 2 * 8); + CFI_REG_ON_STACK(r13, 16 * 16 + 3 * 8); vmovdqu (%rcx), %xmm14; vmovdqu (%r8), %xmm15; @@ -1292,7 +1308,7 @@ _gcry_camellia_aesni_avx_ocb_enc: /* inpack16_pre: */ vmovq (key_table)(CTX), %xmm15; - vpshufb .Lpack_bswap RIP, %xmm15, %xmm15; + vpshufb .Lpack_bswap rRIP, %xmm15, %xmm15; vpxor %xmm0, %xmm15, %xmm0; vpxor %xmm1, %xmm15, %xmm1; vpxor %xmm2, %xmm15, %xmm2; @@ -1335,13 +1351,19 @@ _gcry_camellia_aesni_avx_ocb_enc: vzeroall; - movq (16 * 16 + 0 * 8)(%rax), %r10; - movq (16 * 16 + 1 * 8)(%rax), %r11; - movq (16 * 16 + 2 * 8)(%rax), %r12; - movq (16 * 16 + 3 * 8)(%rax), %r13; + movq (16 * 16 + 0 * 8)(%rsp), %r10; + movq (16 * 16 + 1 * 8)(%rsp), %r11; + movq (16 * 16 + 2 * 8)(%rsp), %r12; + movq (16 * 16 + 3 * 8)(%rsp), %r13; + CFI_RESTORE(%r10); + CFI_RESTORE(%r11); + CFI_RESTORE(%r12); + CFI_RESTORE(%r13); leave; + CFI_LEAVE(); ret; + CFI_ENDPROC(); ELF(.size _gcry_camellia_aesni_avx_ocb_enc,.-_gcry_camellia_aesni_avx_ocb_enc;) .align 8 @@ -1357,9 +1379,12 @@ _gcry_camellia_aesni_avx_ocb_dec: * %r8 : checksum * %r9 : L pointers (void *L[16]) */ + CFI_STARTPROC(); pushq %rbp; + CFI_PUSH(%rbp); movq %rsp, %rbp; + CFI_DEF_CFA_REGISTER(%rbp); vzeroupper; @@ -1367,10 +1392,14 @@ _gcry_camellia_aesni_avx_ocb_dec: andq $~31, %rsp; movq %rsp, %rax; - movq %r10, (16 * 16 + 0 * 8)(%rax); - movq %r11, (16 * 16 + 1 * 8)(%rax); - movq %r12, (16 * 16 + 2 * 8)(%rax); - movq %r13, (16 * 16 + 3 * 8)(%rax); + movq %r10, (16 * 16 + 0 * 8)(%rsp); + movq %r11, (16 * 16 + 1 * 8)(%rsp); + movq %r12, (16 * 16 + 2 * 8)(%rsp); + movq %r13, (16 * 16 + 3 * 8)(%rsp); + CFI_REG_ON_STACK(r10, 16 * 16 + 0 * 8); + CFI_REG_ON_STACK(r11, 16 * 16 + 1 * 8); + CFI_REG_ON_STACK(r12, 16 * 16 + 2 * 8); + CFI_REG_ON_STACK(r13, 16 * 16 + 3 * 8); vmovdqu (%rcx), %xmm15; @@ -1428,7 +1457,7 @@ _gcry_camellia_aesni_avx_ocb_dec: /* inpack16_pre: */ vmovq (key_table)(CTX, %r8, 8), %xmm15; - vpshufb .Lpack_bswap RIP, %xmm15, %xmm15; + vpshufb .Lpack_bswap rRIP, %xmm15, %xmm15; vpxor %xmm0, %xmm15, %xmm0; vpxor %xmm1, %xmm15, %xmm1; vpxor %xmm2, %xmm15, %xmm2; @@ -1493,13 +1522,19 @@ _gcry_camellia_aesni_avx_ocb_dec: vzeroall; - movq (16 * 16 + 0 * 8)(%rax), %r10; - movq (16 * 16 + 1 * 8)(%rax), %r11; - movq (16 * 16 + 2 * 8)(%rax), %r12; - movq (16 * 16 + 3 * 8)(%rax), %r13; + movq (16 * 16 + 0 * 8)(%rsp), %r10; + movq (16 * 16 + 1 * 8)(%rsp), %r11; + movq (16 * 16 + 2 * 8)(%rsp), %r12; + movq (16 * 16 + 3 * 8)(%rsp), %r13; + CFI_RESTORE(%r10); + CFI_RESTORE(%r11); + CFI_RESTORE(%r12); + CFI_RESTORE(%r13); leave; + CFI_LEAVE(); ret; + CFI_ENDPROC(); ELF(.size _gcry_camellia_aesni_avx_ocb_dec,.-_gcry_camellia_aesni_avx_ocb_dec;) .align 8 @@ -1514,9 +1549,12 @@ _gcry_camellia_aesni_avx_ocb_auth: * %rcx: checksum * %r8 : L pointers (void *L[16]) */ + CFI_STARTPROC(); pushq %rbp; + CFI_PUSH(%rbp); movq %rsp, %rbp; + CFI_DEF_CFA_REGISTER(%rbp); vzeroupper; @@ -1524,10 +1562,14 @@ _gcry_camellia_aesni_avx_ocb_auth: andq $~31, %rsp; movq %rsp, %rax; - movq %r10, (16 * 16 + 0 * 8)(%rax); - movq %r11, (16 * 16 + 1 * 8)(%rax); - movq %r12, (16 * 16 + 2 * 8)(%rax); - movq %r13, (16 * 16 + 3 * 8)(%rax); + movq %r10, (16 * 16 + 0 * 8)(%rsp); + movq %r11, (16 * 16 + 1 * 8)(%rsp); + movq %r12, (16 * 16 + 2 * 8)(%rsp); + movq %r13, (16 * 16 + 3 * 8)(%rsp); + CFI_REG_ON_STACK(r10, 16 * 16 + 0 * 8); + CFI_REG_ON_STACK(r11, 16 * 16 + 1 * 8); + CFI_REG_ON_STACK(r12, 16 * 16 + 2 * 8); + CFI_REG_ON_STACK(r13, 16 * 16 + 3 * 8); vmovdqu (%rdx), %xmm15; @@ -1580,7 +1622,7 @@ _gcry_camellia_aesni_avx_ocb_auth: /* inpack16_pre: */ vmovq (key_table)(CTX), %xmm15; - vpshufb .Lpack_bswap RIP, %xmm15, %xmm15; + vpshufb .Lpack_bswap rRIP, %xmm15, %xmm15; vpxor %xmm0, %xmm15, %xmm0; vpxor %xmm1, %xmm15, %xmm1; vpxor %xmm2, %xmm15, %xmm2; @@ -1623,13 +1665,19 @@ _gcry_camellia_aesni_avx_ocb_auth: vzeroall; - movq (16 * 16 + 0 * 8)(%rax), %r10; - movq (16 * 16 + 1 * 8)(%rax), %r11; - movq (16 * 16 + 2 * 8)(%rax), %r12; - movq (16 * 16 + 3 * 8)(%rax), %r13; + movq (16 * 16 + 0 * 8)(%rsp), %r10; + movq (16 * 16 + 1 * 8)(%rsp), %r11; + movq (16 * 16 + 2 * 8)(%rsp), %r12; + movq (16 * 16 + 3 * 8)(%rsp), %r13; + CFI_RESTORE(%r10); + CFI_RESTORE(%r11); + CFI_RESTORE(%r12); + CFI_RESTORE(%r13); leave; + CFI_LEAVE(); ret; + CFI_ENDPROC(); ELF(.size _gcry_camellia_aesni_avx_ocb_auth,.-_gcry_camellia_aesni_avx_ocb_auth;) /* @@ -1657,8 +1705,8 @@ ELF(.size _gcry_camellia_aesni_avx_ocb_auth,.-_gcry_camellia_aesni_avx_ocb_auth; vpand sbox4mask, t0, t0; \ vpor t0, x, x; \ \ - vmovdqa .Lpost_tf_lo_s1 RIP, t0; \ - vmovdqa .Lpost_tf_hi_s1 RIP, t1; \ + vmovdqa .Lpost_tf_lo_s1 rRIP, t0; \ + vmovdqa .Lpost_tf_hi_s1 rRIP, t1; \ \ /* prefilter sboxes */ \ filter_8bit(x, pre_s1lo_mask, pre_s1hi_mask, _0f0f0f0fmask, t2); \ @@ -1672,18 +1720,18 @@ ELF(.size _gcry_camellia_aesni_avx_ocb_auth,.-_gcry_camellia_aesni_avx_ocb_auth; /* output rotation for sbox2 (<<< 1) */ \ /* output rotation for sbox3 (>>> 1) */ \ vpshufb inv_shift_row, x, t1; \ - vpshufb .Lsp0044440444044404mask RIP, x, t4; \ - vpshufb .Lsp1110111010011110mask RIP, x, x; \ + vpshufb .Lsp0044440444044404mask rRIP, x, t4; \ + vpshufb .Lsp1110111010011110mask rRIP, x, x; \ vpaddb t1, t1, t2; \ vpsrlw $7, t1, t0; \ vpsllw $7, t1, t3; \ vpor t0, t2, t0; \ vpsrlw $1, t1, t1; \ - vpshufb .Lsp0222022222000222mask RIP, t0, t0; \ + vpshufb .Lsp0222022222000222mask rRIP, t0, t0; \ vpor t1, t3, t1; \ \ vpxor x, t4, t4; \ - vpshufb .Lsp3033303303303033mask RIP, t1, t1; \ + vpshufb .Lsp3033303303303033mask rRIP, t1, t1; \ vpxor t4, t0, t0; \ vpxor t1, t0, t0; \ vpsrldq $8, t0, x; \ @@ -1741,17 +1789,19 @@ __camellia_avx_setup128: * %rdi: ctx, CTX; subkey storage at key_table(CTX) * %xmm0: key */ + CFI_STARTPROC(); + #define cmll_sub(n, ctx) (key_table+((n)*8))(ctx) #define KL128 %xmm0 #define KA128 %xmm2 - vpshufb .Lbswap128_mask RIP, KL128, KL128; + vpshufb .Lbswap128_mask rRIP, KL128, KL128; - vmovdqa .Linv_shift_row_and_unpcklbw RIP, %xmm11; - vmovq .Lsbox4_input_mask RIP, %xmm12; - vbroadcastss .L0f0f0f0f RIP, %xmm13; - vmovdqa .Lpre_tf_lo_s1 RIP, %xmm14; - vmovdqa .Lpre_tf_hi_s1 RIP, %xmm15; + vmovdqa .Linv_shift_row_and_unpcklbw rRIP, %xmm11; + vmovq .Lsbox4_input_mask rRIP, %xmm12; + vbroadcastss .L0f0f0f0f rRIP, %xmm13; + vmovdqa .Lpre_tf_lo_s1 rRIP, %xmm14; + vmovdqa .Lpre_tf_hi_s1 rRIP, %xmm15; /* * Generate KA @@ -1763,18 +1813,18 @@ __camellia_avx_setup128: camellia_f(%xmm2, %xmm4, %xmm1, %xmm5, %xmm6, %xmm7, %xmm8, - %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma1 RIP); + %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma1 rRIP); vpxor %xmm4, %xmm3, %xmm3; camellia_f(%xmm3, %xmm2, %xmm1, %xmm5, %xmm6, %xmm7, %xmm8, - %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma2 RIP); + %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma2 rRIP); camellia_f(%xmm2, %xmm3, %xmm1, %xmm5, %xmm6, %xmm7, %xmm8, - %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma3 RIP); + %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma3 rRIP); vpxor %xmm4, %xmm3, %xmm3; camellia_f(%xmm3, %xmm4, %xmm1, %xmm5, %xmm6, %xmm7, %xmm8, - %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma4 RIP); + %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma4 rRIP); vpslldq $8, %xmm3, %xmm3; vpxor %xmm4, %xmm2, %xmm2; @@ -2076,6 +2126,7 @@ __camellia_avx_setup128: vzeroall; ret; + CFI_ENDPROC(); ELF(.size __camellia_avx_setup128,.-__camellia_avx_setup128;) .align 8 @@ -2086,19 +2137,21 @@ __camellia_avx_setup256: * %rdi: ctx, CTX; subkey storage at key_table(CTX) * %xmm0 & %xmm1: key */ + CFI_STARTPROC(); + #define KL128 %xmm0 #define KR128 %xmm1 #define KA128 %xmm2 #define KB128 %xmm3 - vpshufb .Lbswap128_mask RIP, KL128, KL128; - vpshufb .Lbswap128_mask RIP, KR128, KR128; + vpshufb .Lbswap128_mask rRIP, KL128, KL128; + vpshufb .Lbswap128_mask rRIP, KR128, KR128; - vmovdqa .Linv_shift_row_and_unpcklbw RIP, %xmm11; - vmovq .Lsbox4_input_mask RIP, %xmm12; - vbroadcastss .L0f0f0f0f RIP, %xmm13; - vmovdqa .Lpre_tf_lo_s1 RIP, %xmm14; - vmovdqa .Lpre_tf_hi_s1 RIP, %xmm15; + vmovdqa .Linv_shift_row_and_unpcklbw rRIP, %xmm11; + vmovq .Lsbox4_input_mask rRIP, %xmm12; + vbroadcastss .L0f0f0f0f rRIP, %xmm13; + vmovdqa .Lpre_tf_lo_s1 rRIP, %xmm14; + vmovdqa .Lpre_tf_hi_s1 rRIP, %xmm15; /* * Generate KA @@ -2111,20 +2164,20 @@ __camellia_avx_setup256: camellia_f(%xmm2, %xmm4, %xmm5, %xmm7, %xmm8, %xmm9, %xmm10, - %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma1 RIP); + %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma1 rRIP); vpxor %xmm4, %xmm3, %xmm3; camellia_f(%xmm3, %xmm2, %xmm5, %xmm7, %xmm8, %xmm9, %xmm10, - %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma2 RIP); + %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma2 rRIP); vpxor %xmm6, %xmm2, %xmm2; camellia_f(%xmm2, %xmm3, %xmm5, %xmm7, %xmm8, %xmm9, %xmm10, - %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma3 RIP); + %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma3 rRIP); vpxor %xmm4, %xmm3, %xmm3; vpxor KR128, %xmm3, %xmm3; camellia_f(%xmm3, %xmm4, %xmm5, %xmm7, %xmm8, %xmm9, %xmm10, - %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma4 RIP); + %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma4 rRIP); vpslldq $8, %xmm3, %xmm3; vpxor %xmm4, %xmm2, %xmm2; @@ -2142,12 +2195,12 @@ __camellia_avx_setup256: camellia_f(%xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9, %xmm10, - %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma5 RIP); + %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma5 rRIP); vpxor %xmm5, %xmm3, %xmm3; camellia_f(%xmm3, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9, %xmm10, - %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma6 RIP); + %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma6 rRIP); vpslldq $8, %xmm3, %xmm3; vpxor %xmm5, %xmm4, %xmm4; vpsrldq $8, %xmm3, %xmm3; @@ -2553,6 +2606,7 @@ __camellia_avx_setup256: vzeroall; ret; + CFI_ENDPROC(); ELF(.size __camellia_avx_setup256,.-__camellia_avx_setup256;) .align 8 @@ -2565,6 +2619,7 @@ _gcry_camellia_aesni_avx_keygen: * %rsi: key * %rdx: keylen */ + CFI_STARTPROC(); vzeroupper; @@ -2585,6 +2640,7 @@ _gcry_camellia_aesni_avx_keygen: vpor %xmm2, %xmm1, %xmm1; jmp __camellia_avx_setup256; + CFI_ENDPROC(); ELF(.size _gcry_camellia_aesni_avx_keygen,.-_gcry_camellia_aesni_avx_keygen;) #endif /*defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT)*/ |