diff options
Diffstat (limited to 'FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sha256_asm.S')
-rw-r--r-- | FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sha256_asm.S | 22653 |
1 files changed, 22653 insertions, 0 deletions
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sha256_asm.S b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sha256_asm.S new file mode 100644 index 000000000..c433d341c --- /dev/null +++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sha256_asm.S @@ -0,0 +1,22653 @@ +/* sha256_asm + * + * Copyright (C) 2006-2020 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +#ifndef HAVE_INTEL_AVX1 +#define HAVE_INTEL_AVX1 +#endif /* HAVE_INTEL_AVX1 */ +#ifndef NO_AVX2_SUPPORT +#define HAVE_INTEL_AVX2 +#endif /* NO_AVX2_SUPPORT */ + +#ifdef HAVE_INTEL_AVX1 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +L_avx1_sha256_k: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0xfc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x6ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_avx1_sha256_shuf_00BA: +.quad 0xb0a090803020100, 0xffffffffffffffff +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_avx1_sha256_shuf_DC00: +.quad 0xffffffffffffffff, 0xb0a090803020100 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_avx1_sha256_flip_mask: +.quad 0x405060700010203, 0xc0d0e0f08090a0b +#ifndef __APPLE__ +.text +.globl Transform_Sha256_AVX1 +.type Transform_Sha256_AVX1,@function +.align 4 +Transform_Sha256_AVX1: +#else +.section __TEXT,__text +.globl _Transform_Sha256_AVX1 +.p2align 2 +_Transform_Sha256_AVX1: +#endif /* __APPLE__ */ + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $0x40, %rsp + leaq 32(%rdi), %rax + vmovdqa L_avx1_sha256_flip_mask(%rip), %xmm13 + vmovdqa L_avx1_sha256_shuf_00BA(%rip), %xmm11 + vmovdqa L_avx1_sha256_shuf_DC00(%rip), %xmm12 + movl (%rdi), %r8d + movl 4(%rdi), %r9d + movl 8(%rdi), %r10d + movl 12(%rdi), %r11d + movl 16(%rdi), %r12d + movl 20(%rdi), %r13d + movl 24(%rdi), %r14d + movl 28(%rdi), %r15d + # X0, X1, X2, X3 = W[0..15] + vmovdqu (%rax), %xmm0 + vmovdqu 16(%rax), %xmm1 + vpshufb %xmm13, %xmm0, %xmm0 + vpshufb %xmm13, %xmm1, %xmm1 + vmovdqu 32(%rax), %xmm2 + vmovdqu 48(%rax), %xmm3 + vpshufb %xmm13, %xmm2, %xmm2 + vpshufb %xmm13, %xmm3, %xmm3 + movl %r9d, %ebx + movl %r12d, %edx + xorl %r10d, %ebx + # set_w_k_xfer_4: 0 + vpaddd 0+L_avx1_sha256_k(%rip), %xmm0, %xmm4 + vpaddd 16+L_avx1_sha256_k(%rip), %xmm1, %xmm5 + vmovdqu %xmm4, (%rsp) + vmovdqu %xmm5, 16(%rsp) + vpaddd 32+L_avx1_sha256_k(%rip), %xmm2, %xmm6 + vpaddd 48+L_avx1_sha256_k(%rip), %xmm3, %xmm7 + vmovdqu %xmm6, 32(%rsp) + vmovdqu %xmm7, 48(%rsp) + # msg_sched: 0-3 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm0, %xmm1, %xmm5 + vpalignr $4, %xmm2, %xmm3, %xmm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl (%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 4(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm3, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm0, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 8(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 12(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %xmm4, %xmm9, %xmm0 + # msg_sched done: 0-3 + # msg_sched: 4-7 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm1, %xmm2, %xmm5 + vpalignr $4, %xmm3, %xmm0, %xmm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 16(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 20(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm0, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm1, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 24(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 28(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %xmm4, %xmm9, %xmm1 + # msg_sched done: 4-7 + # msg_sched: 8-11 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm2, %xmm3, %xmm5 + vpalignr $4, %xmm0, %xmm1, %xmm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl 32(%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 36(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm1, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm2, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 40(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 44(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %xmm4, %xmm9, %xmm2 + # msg_sched done: 8-11 + # msg_sched: 12-15 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm3, %xmm0, %xmm5 + vpalignr $4, %xmm1, %xmm2, %xmm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 48(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 52(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm2, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm3, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 56(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 60(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %xmm4, %xmm9, %xmm3 + # msg_sched done: 12-15 + # set_w_k_xfer_4: 4 + vpaddd 64+L_avx1_sha256_k(%rip), %xmm0, %xmm4 + vpaddd 80+L_avx1_sha256_k(%rip), %xmm1, %xmm5 + vmovdqu %xmm4, (%rsp) + vmovdqu %xmm5, 16(%rsp) + vpaddd 96+L_avx1_sha256_k(%rip), %xmm2, %xmm6 + vpaddd 112+L_avx1_sha256_k(%rip), %xmm3, %xmm7 + vmovdqu %xmm6, 32(%rsp) + vmovdqu %xmm7, 48(%rsp) + # msg_sched: 0-3 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm0, %xmm1, %xmm5 + vpalignr $4, %xmm2, %xmm3, %xmm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl (%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 4(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm3, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm0, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 8(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 12(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %xmm4, %xmm9, %xmm0 + # msg_sched done: 0-3 + # msg_sched: 4-7 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm1, %xmm2, %xmm5 + vpalignr $4, %xmm3, %xmm0, %xmm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 16(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 20(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm0, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm1, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 24(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 28(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %xmm4, %xmm9, %xmm1 + # msg_sched done: 4-7 + # msg_sched: 8-11 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm2, %xmm3, %xmm5 + vpalignr $4, %xmm0, %xmm1, %xmm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl 32(%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 36(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm1, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm2, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 40(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 44(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %xmm4, %xmm9, %xmm2 + # msg_sched done: 8-11 + # msg_sched: 12-15 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm3, %xmm0, %xmm5 + vpalignr $4, %xmm1, %xmm2, %xmm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 48(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 52(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm2, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm3, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 56(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 60(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %xmm4, %xmm9, %xmm3 + # msg_sched done: 12-15 + # set_w_k_xfer_4: 8 + vpaddd 128+L_avx1_sha256_k(%rip), %xmm0, %xmm4 + vpaddd 144+L_avx1_sha256_k(%rip), %xmm1, %xmm5 + vmovdqu %xmm4, (%rsp) + vmovdqu %xmm5, 16(%rsp) + vpaddd 160+L_avx1_sha256_k(%rip), %xmm2, %xmm6 + vpaddd 176+L_avx1_sha256_k(%rip), %xmm3, %xmm7 + vmovdqu %xmm6, 32(%rsp) + vmovdqu %xmm7, 48(%rsp) + # msg_sched: 0-3 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm0, %xmm1, %xmm5 + vpalignr $4, %xmm2, %xmm3, %xmm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl (%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 4(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm3, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm0, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 8(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 12(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %xmm4, %xmm9, %xmm0 + # msg_sched done: 0-3 + # msg_sched: 4-7 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm1, %xmm2, %xmm5 + vpalignr $4, %xmm3, %xmm0, %xmm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 16(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 20(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm0, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm1, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 24(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 28(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %xmm4, %xmm9, %xmm1 + # msg_sched done: 4-7 + # msg_sched: 8-11 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm2, %xmm3, %xmm5 + vpalignr $4, %xmm0, %xmm1, %xmm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl 32(%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 36(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm1, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm2, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 40(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 44(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %xmm4, %xmm9, %xmm2 + # msg_sched done: 8-11 + # msg_sched: 12-15 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm3, %xmm0, %xmm5 + vpalignr $4, %xmm1, %xmm2, %xmm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 48(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 52(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm2, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm3, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 56(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 60(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %xmm4, %xmm9, %xmm3 + # msg_sched done: 12-15 + # set_w_k_xfer_4: 12 + vpaddd 192+L_avx1_sha256_k(%rip), %xmm0, %xmm4 + vpaddd 208+L_avx1_sha256_k(%rip), %xmm1, %xmm5 + vmovdqu %xmm4, (%rsp) + vmovdqu %xmm5, 16(%rsp) + vpaddd 224+L_avx1_sha256_k(%rip), %xmm2, %xmm6 + vpaddd 240+L_avx1_sha256_k(%rip), %xmm3, %xmm7 + vmovdqu %xmm6, 32(%rsp) + vmovdqu %xmm7, 48(%rsp) + # rnd_all_4: 0-3 + addl (%rsp), %r15d + movl %r13d, %ecx + movl %r9d, %eax + xorl %r14d, %ecx + rorl $14, %edx + andl %r12d, %ecx + xorl %r12d, %edx + xorl %r14d, %ecx + rorl $5, %edx + addl %ecx, %r15d + xorl %r12d, %edx + xorl %r8d, %eax + rorl $6, %edx + movl %r8d, %ecx + addl %edx, %r15d + rorl $9, %ecx + andl %eax, %ebx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + addl 4(%rsp), %r14d + movl %r12d, %ecx + movl %r8d, %ebx + xorl %r13d, %ecx + rorl $14, %edx + andl %r11d, %ecx + xorl %r11d, %edx + xorl %r13d, %ecx + rorl $5, %edx + addl %ecx, %r14d + xorl %r11d, %edx + xorl %r15d, %ebx + rorl $6, %edx + movl %r15d, %ecx + addl %edx, %r14d + rorl $9, %ecx + andl %ebx, %eax + xorl %r15d, %ecx + xorl %r8d, %eax + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + addl 8(%rsp), %r13d + movl %r11d, %ecx + movl %r15d, %eax + xorl %r12d, %ecx + rorl $14, %edx + andl %r10d, %ecx + xorl %r10d, %edx + xorl %r12d, %ecx + rorl $5, %edx + addl %ecx, %r13d + xorl %r10d, %edx + xorl %r14d, %eax + rorl $6, %edx + movl %r14d, %ecx + addl %edx, %r13d + rorl $9, %ecx + andl %eax, %ebx + xorl %r14d, %ecx + xorl %r15d, %ebx + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + addl 12(%rsp), %r12d + movl %r10d, %ecx + movl %r14d, %ebx + xorl %r11d, %ecx + rorl $14, %edx + andl %r9d, %ecx + xorl %r9d, %edx + xorl %r11d, %ecx + rorl $5, %edx + addl %ecx, %r12d + xorl %r9d, %edx + xorl %r13d, %ebx + rorl $6, %edx + movl %r13d, %ecx + addl %edx, %r12d + rorl $9, %ecx + andl %ebx, %eax + xorl %r13d, %ecx + xorl %r14d, %eax + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + # rnd_all_4: 1-4 + addl 16(%rsp), %r11d + movl %r9d, %ecx + movl %r13d, %eax + xorl %r10d, %ecx + rorl $14, %edx + andl %r8d, %ecx + xorl %r8d, %edx + xorl %r10d, %ecx + rorl $5, %edx + addl %ecx, %r11d + xorl %r8d, %edx + xorl %r12d, %eax + rorl $6, %edx + movl %r12d, %ecx + addl %edx, %r11d + rorl $9, %ecx + andl %eax, %ebx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + addl 20(%rsp), %r10d + movl %r8d, %ecx + movl %r12d, %ebx + xorl %r9d, %ecx + rorl $14, %edx + andl %r15d, %ecx + xorl %r15d, %edx + xorl %r9d, %ecx + rorl $5, %edx + addl %ecx, %r10d + xorl %r15d, %edx + xorl %r11d, %ebx + rorl $6, %edx + movl %r11d, %ecx + addl %edx, %r10d + rorl $9, %ecx + andl %ebx, %eax + xorl %r11d, %ecx + xorl %r12d, %eax + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + addl 24(%rsp), %r9d + movl %r15d, %ecx + movl %r11d, %eax + xorl %r8d, %ecx + rorl $14, %edx + andl %r14d, %ecx + xorl %r14d, %edx + xorl %r8d, %ecx + rorl $5, %edx + addl %ecx, %r9d + xorl %r14d, %edx + xorl %r10d, %eax + rorl $6, %edx + movl %r10d, %ecx + addl %edx, %r9d + rorl $9, %ecx + andl %eax, %ebx + xorl %r10d, %ecx + xorl %r11d, %ebx + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + addl 28(%rsp), %r8d + movl %r14d, %ecx + movl %r10d, %ebx + xorl %r15d, %ecx + rorl $14, %edx + andl %r13d, %ecx + xorl %r13d, %edx + xorl %r15d, %ecx + rorl $5, %edx + addl %ecx, %r8d + xorl %r13d, %edx + xorl %r9d, %ebx + rorl $6, %edx + movl %r9d, %ecx + addl %edx, %r8d + rorl $9, %ecx + andl %ebx, %eax + xorl %r9d, %ecx + xorl %r10d, %eax + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + # rnd_all_4: 2-5 + addl 32(%rsp), %r15d + movl %r13d, %ecx + movl %r9d, %eax + xorl %r14d, %ecx + rorl $14, %edx + andl %r12d, %ecx + xorl %r12d, %edx + xorl %r14d, %ecx + rorl $5, %edx + addl %ecx, %r15d + xorl %r12d, %edx + xorl %r8d, %eax + rorl $6, %edx + movl %r8d, %ecx + addl %edx, %r15d + rorl $9, %ecx + andl %eax, %ebx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + addl 36(%rsp), %r14d + movl %r12d, %ecx + movl %r8d, %ebx + xorl %r13d, %ecx + rorl $14, %edx + andl %r11d, %ecx + xorl %r11d, %edx + xorl %r13d, %ecx + rorl $5, %edx + addl %ecx, %r14d + xorl %r11d, %edx + xorl %r15d, %ebx + rorl $6, %edx + movl %r15d, %ecx + addl %edx, %r14d + rorl $9, %ecx + andl %ebx, %eax + xorl %r15d, %ecx + xorl %r8d, %eax + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + addl 40(%rsp), %r13d + movl %r11d, %ecx + movl %r15d, %eax + xorl %r12d, %ecx + rorl $14, %edx + andl %r10d, %ecx + xorl %r10d, %edx + xorl %r12d, %ecx + rorl $5, %edx + addl %ecx, %r13d + xorl %r10d, %edx + xorl %r14d, %eax + rorl $6, %edx + movl %r14d, %ecx + addl %edx, %r13d + rorl $9, %ecx + andl %eax, %ebx + xorl %r14d, %ecx + xorl %r15d, %ebx + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + addl 44(%rsp), %r12d + movl %r10d, %ecx + movl %r14d, %ebx + xorl %r11d, %ecx + rorl $14, %edx + andl %r9d, %ecx + xorl %r9d, %edx + xorl %r11d, %ecx + rorl $5, %edx + addl %ecx, %r12d + xorl %r9d, %edx + xorl %r13d, %ebx + rorl $6, %edx + movl %r13d, %ecx + addl %edx, %r12d + rorl $9, %ecx + andl %ebx, %eax + xorl %r13d, %ecx + xorl %r14d, %eax + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + # rnd_all_4: 3-6 + addl 48(%rsp), %r11d + movl %r9d, %ecx + movl %r13d, %eax + xorl %r10d, %ecx + rorl $14, %edx + andl %r8d, %ecx + xorl %r8d, %edx + xorl %r10d, %ecx + rorl $5, %edx + addl %ecx, %r11d + xorl %r8d, %edx + xorl %r12d, %eax + rorl $6, %edx + movl %r12d, %ecx + addl %edx, %r11d + rorl $9, %ecx + andl %eax, %ebx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + addl 52(%rsp), %r10d + movl %r8d, %ecx + movl %r12d, %ebx + xorl %r9d, %ecx + rorl $14, %edx + andl %r15d, %ecx + xorl %r15d, %edx + xorl %r9d, %ecx + rorl $5, %edx + addl %ecx, %r10d + xorl %r15d, %edx + xorl %r11d, %ebx + rorl $6, %edx + movl %r11d, %ecx + addl %edx, %r10d + rorl $9, %ecx + andl %ebx, %eax + xorl %r11d, %ecx + xorl %r12d, %eax + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + addl 56(%rsp), %r9d + movl %r15d, %ecx + movl %r11d, %eax + xorl %r8d, %ecx + rorl $14, %edx + andl %r14d, %ecx + xorl %r14d, %edx + xorl %r8d, %ecx + rorl $5, %edx + addl %ecx, %r9d + xorl %r14d, %edx + xorl %r10d, %eax + rorl $6, %edx + movl %r10d, %ecx + addl %edx, %r9d + rorl $9, %ecx + andl %eax, %ebx + xorl %r10d, %ecx + xorl %r11d, %ebx + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + addl 60(%rsp), %r8d + movl %r14d, %ecx + movl %r10d, %ebx + xorl %r15d, %ecx + rorl $14, %edx + andl %r13d, %ecx + xorl %r13d, %edx + xorl %r15d, %ecx + rorl $5, %edx + addl %ecx, %r8d + xorl %r13d, %edx + xorl %r9d, %ebx + rorl $6, %edx + movl %r9d, %ecx + addl %edx, %r8d + rorl $9, %ecx + andl %ebx, %eax + xorl %r9d, %ecx + xorl %r10d, %eax + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + addl %r8d, (%rdi) + addl %r9d, 4(%rdi) + addl %r10d, 8(%rdi) + addl %r11d, 12(%rdi) + addl %r12d, 16(%rdi) + addl %r13d, 20(%rdi) + addl %r14d, 24(%rdi) + addl %r15d, 28(%rdi) + xorq %rax, %rax + vzeroupper + addq $0x40, %rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + repz retq +#ifndef __APPLE__ +.size Transform_Sha256_AVX1,.-Transform_Sha256_AVX1 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl Transform_Sha256_AVX1_Len +.type Transform_Sha256_AVX1_Len,@function +.align 4 +Transform_Sha256_AVX1_Len: +#else +.section __TEXT,__text +.globl _Transform_Sha256_AVX1_Len +.p2align 2 +_Transform_Sha256_AVX1_Len: +#endif /* __APPLE__ */ + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushq %rbp + movq %rsi, %rbp + movq %rdx, %rsi + subq $0x40, %rsp + vmovdqa L_avx1_sha256_flip_mask(%rip), %xmm13 + vmovdqa L_avx1_sha256_shuf_00BA(%rip), %xmm11 + vmovdqa L_avx1_sha256_shuf_DC00(%rip), %xmm12 + movl (%rdi), %r8d + movl 4(%rdi), %r9d + movl 8(%rdi), %r10d + movl 12(%rdi), %r11d + movl 16(%rdi), %r12d + movl 20(%rdi), %r13d + movl 24(%rdi), %r14d + movl 28(%rdi), %r15d + # Start of loop processing a block +L_sha256_len_avx1_start: + # X0, X1, X2, X3 = W[0..15] + vmovdqu (%rbp), %xmm0 + vmovdqu 16(%rbp), %xmm1 + vpshufb %xmm13, %xmm0, %xmm0 + vpshufb %xmm13, %xmm1, %xmm1 + vmovdqu 32(%rbp), %xmm2 + vmovdqu 48(%rbp), %xmm3 + vpshufb %xmm13, %xmm2, %xmm2 + vpshufb %xmm13, %xmm3, %xmm3 + movl %r9d, %ebx + movl %r12d, %edx + xorl %r10d, %ebx + # set_w_k_xfer_4: 0 + vpaddd 0+L_avx1_sha256_k(%rip), %xmm0, %xmm4 + vpaddd 16+L_avx1_sha256_k(%rip), %xmm1, %xmm5 + vmovdqu %xmm4, (%rsp) + vmovdqu %xmm5, 16(%rsp) + vpaddd 32+L_avx1_sha256_k(%rip), %xmm2, %xmm6 + vpaddd 48+L_avx1_sha256_k(%rip), %xmm3, %xmm7 + vmovdqu %xmm6, 32(%rsp) + vmovdqu %xmm7, 48(%rsp) + # msg_sched: 0-3 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm0, %xmm1, %xmm5 + vpalignr $4, %xmm2, %xmm3, %xmm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl (%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 4(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm3, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm0, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 8(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 12(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %xmm4, %xmm9, %xmm0 + # msg_sched done: 0-3 + # msg_sched: 4-7 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm1, %xmm2, %xmm5 + vpalignr $4, %xmm3, %xmm0, %xmm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 16(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 20(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm0, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm1, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 24(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 28(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %xmm4, %xmm9, %xmm1 + # msg_sched done: 4-7 + # msg_sched: 8-11 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm2, %xmm3, %xmm5 + vpalignr $4, %xmm0, %xmm1, %xmm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl 32(%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 36(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm1, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm2, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 40(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 44(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %xmm4, %xmm9, %xmm2 + # msg_sched done: 8-11 + # msg_sched: 12-15 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm3, %xmm0, %xmm5 + vpalignr $4, %xmm1, %xmm2, %xmm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 48(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 52(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm2, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm3, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 56(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 60(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %xmm4, %xmm9, %xmm3 + # msg_sched done: 12-15 + # set_w_k_xfer_4: 4 + vpaddd 64+L_avx1_sha256_k(%rip), %xmm0, %xmm4 + vpaddd 80+L_avx1_sha256_k(%rip), %xmm1, %xmm5 + vmovdqu %xmm4, (%rsp) + vmovdqu %xmm5, 16(%rsp) + vpaddd 96+L_avx1_sha256_k(%rip), %xmm2, %xmm6 + vpaddd 112+L_avx1_sha256_k(%rip), %xmm3, %xmm7 + vmovdqu %xmm6, 32(%rsp) + vmovdqu %xmm7, 48(%rsp) + # msg_sched: 0-3 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm0, %xmm1, %xmm5 + vpalignr $4, %xmm2, %xmm3, %xmm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl (%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 4(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm3, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm0, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 8(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 12(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %xmm4, %xmm9, %xmm0 + # msg_sched done: 0-3 + # msg_sched: 4-7 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm1, %xmm2, %xmm5 + vpalignr $4, %xmm3, %xmm0, %xmm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 16(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 20(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm0, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm1, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 24(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 28(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %xmm4, %xmm9, %xmm1 + # msg_sched done: 4-7 + # msg_sched: 8-11 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm2, %xmm3, %xmm5 + vpalignr $4, %xmm0, %xmm1, %xmm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl 32(%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 36(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm1, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm2, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 40(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 44(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %xmm4, %xmm9, %xmm2 + # msg_sched done: 8-11 + # msg_sched: 12-15 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm3, %xmm0, %xmm5 + vpalignr $4, %xmm1, %xmm2, %xmm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 48(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 52(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm2, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm3, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 56(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 60(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %xmm4, %xmm9, %xmm3 + # msg_sched done: 12-15 + # set_w_k_xfer_4: 8 + vpaddd 128+L_avx1_sha256_k(%rip), %xmm0, %xmm4 + vpaddd 144+L_avx1_sha256_k(%rip), %xmm1, %xmm5 + vmovdqu %xmm4, (%rsp) + vmovdqu %xmm5, 16(%rsp) + vpaddd 160+L_avx1_sha256_k(%rip), %xmm2, %xmm6 + vpaddd 176+L_avx1_sha256_k(%rip), %xmm3, %xmm7 + vmovdqu %xmm6, 32(%rsp) + vmovdqu %xmm7, 48(%rsp) + # msg_sched: 0-3 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm0, %xmm1, %xmm5 + vpalignr $4, %xmm2, %xmm3, %xmm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl (%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 4(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm3, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm0, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 8(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 12(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %xmm4, %xmm9, %xmm0 + # msg_sched done: 0-3 + # msg_sched: 4-7 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm1, %xmm2, %xmm5 + vpalignr $4, %xmm3, %xmm0, %xmm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 16(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 20(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm0, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm1, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 24(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 28(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %xmm4, %xmm9, %xmm1 + # msg_sched done: 4-7 + # msg_sched: 8-11 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm2, %xmm3, %xmm5 + vpalignr $4, %xmm0, %xmm1, %xmm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl 32(%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 36(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm1, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm2, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 40(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 44(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %xmm4, %xmm9, %xmm2 + # msg_sched done: 8-11 + # msg_sched: 12-15 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %xmm3, %xmm0, %xmm5 + vpalignr $4, %xmm1, %xmm2, %xmm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 48(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %xmm5, %xmm8 + vpslld $14, %xmm5, %xmm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %xmm6, %xmm7, %xmm6 + vpor %xmm8, %xmm9, %xmm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 52(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %xmm5, %xmm9 + vpxor %xmm6, %xmm8, %xmm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %xmm6, %xmm9, %xmm5 + vpshufd $0xfa, %xmm2, %xmm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm3, %xmm4, %xmm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 56(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %xmm6, %xmm7, %xmm6 + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 60(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %xmm6, %xmm8 + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %xmm6, %xmm9 + vpxor %xmm8, %xmm7, %xmm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %xmm9, %xmm8, %xmm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %xmm4, %xmm9, %xmm3 + # msg_sched done: 12-15 + # set_w_k_xfer_4: 12 + vpaddd 192+L_avx1_sha256_k(%rip), %xmm0, %xmm4 + vpaddd 208+L_avx1_sha256_k(%rip), %xmm1, %xmm5 + vmovdqu %xmm4, (%rsp) + vmovdqu %xmm5, 16(%rsp) + vpaddd 224+L_avx1_sha256_k(%rip), %xmm2, %xmm6 + vpaddd 240+L_avx1_sha256_k(%rip), %xmm3, %xmm7 + vmovdqu %xmm6, 32(%rsp) + vmovdqu %xmm7, 48(%rsp) + # rnd_all_4: 0-3 + addl (%rsp), %r15d + movl %r13d, %ecx + movl %r9d, %eax + xorl %r14d, %ecx + rorl $14, %edx + andl %r12d, %ecx + xorl %r12d, %edx + xorl %r14d, %ecx + rorl $5, %edx + addl %ecx, %r15d + xorl %r12d, %edx + xorl %r8d, %eax + rorl $6, %edx + movl %r8d, %ecx + addl %edx, %r15d + rorl $9, %ecx + andl %eax, %ebx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + addl 4(%rsp), %r14d + movl %r12d, %ecx + movl %r8d, %ebx + xorl %r13d, %ecx + rorl $14, %edx + andl %r11d, %ecx + xorl %r11d, %edx + xorl %r13d, %ecx + rorl $5, %edx + addl %ecx, %r14d + xorl %r11d, %edx + xorl %r15d, %ebx + rorl $6, %edx + movl %r15d, %ecx + addl %edx, %r14d + rorl $9, %ecx + andl %ebx, %eax + xorl %r15d, %ecx + xorl %r8d, %eax + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + addl 8(%rsp), %r13d + movl %r11d, %ecx + movl %r15d, %eax + xorl %r12d, %ecx + rorl $14, %edx + andl %r10d, %ecx + xorl %r10d, %edx + xorl %r12d, %ecx + rorl $5, %edx + addl %ecx, %r13d + xorl %r10d, %edx + xorl %r14d, %eax + rorl $6, %edx + movl %r14d, %ecx + addl %edx, %r13d + rorl $9, %ecx + andl %eax, %ebx + xorl %r14d, %ecx + xorl %r15d, %ebx + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + addl 12(%rsp), %r12d + movl %r10d, %ecx + movl %r14d, %ebx + xorl %r11d, %ecx + rorl $14, %edx + andl %r9d, %ecx + xorl %r9d, %edx + xorl %r11d, %ecx + rorl $5, %edx + addl %ecx, %r12d + xorl %r9d, %edx + xorl %r13d, %ebx + rorl $6, %edx + movl %r13d, %ecx + addl %edx, %r12d + rorl $9, %ecx + andl %ebx, %eax + xorl %r13d, %ecx + xorl %r14d, %eax + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + # rnd_all_4: 1-4 + addl 16(%rsp), %r11d + movl %r9d, %ecx + movl %r13d, %eax + xorl %r10d, %ecx + rorl $14, %edx + andl %r8d, %ecx + xorl %r8d, %edx + xorl %r10d, %ecx + rorl $5, %edx + addl %ecx, %r11d + xorl %r8d, %edx + xorl %r12d, %eax + rorl $6, %edx + movl %r12d, %ecx + addl %edx, %r11d + rorl $9, %ecx + andl %eax, %ebx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + addl 20(%rsp), %r10d + movl %r8d, %ecx + movl %r12d, %ebx + xorl %r9d, %ecx + rorl $14, %edx + andl %r15d, %ecx + xorl %r15d, %edx + xorl %r9d, %ecx + rorl $5, %edx + addl %ecx, %r10d + xorl %r15d, %edx + xorl %r11d, %ebx + rorl $6, %edx + movl %r11d, %ecx + addl %edx, %r10d + rorl $9, %ecx + andl %ebx, %eax + xorl %r11d, %ecx + xorl %r12d, %eax + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + addl 24(%rsp), %r9d + movl %r15d, %ecx + movl %r11d, %eax + xorl %r8d, %ecx + rorl $14, %edx + andl %r14d, %ecx + xorl %r14d, %edx + xorl %r8d, %ecx + rorl $5, %edx + addl %ecx, %r9d + xorl %r14d, %edx + xorl %r10d, %eax + rorl $6, %edx + movl %r10d, %ecx + addl %edx, %r9d + rorl $9, %ecx + andl %eax, %ebx + xorl %r10d, %ecx + xorl %r11d, %ebx + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + addl 28(%rsp), %r8d + movl %r14d, %ecx + movl %r10d, %ebx + xorl %r15d, %ecx + rorl $14, %edx + andl %r13d, %ecx + xorl %r13d, %edx + xorl %r15d, %ecx + rorl $5, %edx + addl %ecx, %r8d + xorl %r13d, %edx + xorl %r9d, %ebx + rorl $6, %edx + movl %r9d, %ecx + addl %edx, %r8d + rorl $9, %ecx + andl %ebx, %eax + xorl %r9d, %ecx + xorl %r10d, %eax + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + # rnd_all_4: 2-5 + addl 32(%rsp), %r15d + movl %r13d, %ecx + movl %r9d, %eax + xorl %r14d, %ecx + rorl $14, %edx + andl %r12d, %ecx + xorl %r12d, %edx + xorl %r14d, %ecx + rorl $5, %edx + addl %ecx, %r15d + xorl %r12d, %edx + xorl %r8d, %eax + rorl $6, %edx + movl %r8d, %ecx + addl %edx, %r15d + rorl $9, %ecx + andl %eax, %ebx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + addl 36(%rsp), %r14d + movl %r12d, %ecx + movl %r8d, %ebx + xorl %r13d, %ecx + rorl $14, %edx + andl %r11d, %ecx + xorl %r11d, %edx + xorl %r13d, %ecx + rorl $5, %edx + addl %ecx, %r14d + xorl %r11d, %edx + xorl %r15d, %ebx + rorl $6, %edx + movl %r15d, %ecx + addl %edx, %r14d + rorl $9, %ecx + andl %ebx, %eax + xorl %r15d, %ecx + xorl %r8d, %eax + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + addl 40(%rsp), %r13d + movl %r11d, %ecx + movl %r15d, %eax + xorl %r12d, %ecx + rorl $14, %edx + andl %r10d, %ecx + xorl %r10d, %edx + xorl %r12d, %ecx + rorl $5, %edx + addl %ecx, %r13d + xorl %r10d, %edx + xorl %r14d, %eax + rorl $6, %edx + movl %r14d, %ecx + addl %edx, %r13d + rorl $9, %ecx + andl %eax, %ebx + xorl %r14d, %ecx + xorl %r15d, %ebx + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + addl 44(%rsp), %r12d + movl %r10d, %ecx + movl %r14d, %ebx + xorl %r11d, %ecx + rorl $14, %edx + andl %r9d, %ecx + xorl %r9d, %edx + xorl %r11d, %ecx + rorl $5, %edx + addl %ecx, %r12d + xorl %r9d, %edx + xorl %r13d, %ebx + rorl $6, %edx + movl %r13d, %ecx + addl %edx, %r12d + rorl $9, %ecx + andl %ebx, %eax + xorl %r13d, %ecx + xorl %r14d, %eax + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + # rnd_all_4: 3-6 + addl 48(%rsp), %r11d + movl %r9d, %ecx + movl %r13d, %eax + xorl %r10d, %ecx + rorl $14, %edx + andl %r8d, %ecx + xorl %r8d, %edx + xorl %r10d, %ecx + rorl $5, %edx + addl %ecx, %r11d + xorl %r8d, %edx + xorl %r12d, %eax + rorl $6, %edx + movl %r12d, %ecx + addl %edx, %r11d + rorl $9, %ecx + andl %eax, %ebx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + addl 52(%rsp), %r10d + movl %r8d, %ecx + movl %r12d, %ebx + xorl %r9d, %ecx + rorl $14, %edx + andl %r15d, %ecx + xorl %r15d, %edx + xorl %r9d, %ecx + rorl $5, %edx + addl %ecx, %r10d + xorl %r15d, %edx + xorl %r11d, %ebx + rorl $6, %edx + movl %r11d, %ecx + addl %edx, %r10d + rorl $9, %ecx + andl %ebx, %eax + xorl %r11d, %ecx + xorl %r12d, %eax + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + addl 56(%rsp), %r9d + movl %r15d, %ecx + movl %r11d, %eax + xorl %r8d, %ecx + rorl $14, %edx + andl %r14d, %ecx + xorl %r14d, %edx + xorl %r8d, %ecx + rorl $5, %edx + addl %ecx, %r9d + xorl %r14d, %edx + xorl %r10d, %eax + rorl $6, %edx + movl %r10d, %ecx + addl %edx, %r9d + rorl $9, %ecx + andl %eax, %ebx + xorl %r10d, %ecx + xorl %r11d, %ebx + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + addl 60(%rsp), %r8d + movl %r14d, %ecx + movl %r10d, %ebx + xorl %r15d, %ecx + rorl $14, %edx + andl %r13d, %ecx + xorl %r13d, %edx + xorl %r15d, %ecx + rorl $5, %edx + addl %ecx, %r8d + xorl %r13d, %edx + xorl %r9d, %ebx + rorl $6, %edx + movl %r9d, %ecx + addl %edx, %r8d + rorl $9, %ecx + andl %ebx, %eax + xorl %r9d, %ecx + xorl %r10d, %eax + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + addl (%rdi), %r8d + addl 4(%rdi), %r9d + addl 8(%rdi), %r10d + addl 12(%rdi), %r11d + addl 16(%rdi), %r12d + addl 20(%rdi), %r13d + addl 24(%rdi), %r14d + addl 28(%rdi), %r15d + addq $0x40, %rbp + subl $0x40, %esi + movl %r8d, (%rdi) + movl %r9d, 4(%rdi) + movl %r10d, 8(%rdi) + movl %r11d, 12(%rdi) + movl %r12d, 16(%rdi) + movl %r13d, 20(%rdi) + movl %r14d, 24(%rdi) + movl %r15d, 28(%rdi) + jnz L_sha256_len_avx1_start + xorq %rax, %rax + vzeroupper + addq $0x40, %rsp + popq %rbp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + repz retq +#ifndef __APPLE__ +.size Transform_Sha256_AVX1_Len,.-Transform_Sha256_AVX1_Len +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +L_avx1_rorx_sha256_k: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0xfc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x6ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_avx1_rorx_sha256_shuf_00BA: +.quad 0xb0a090803020100, 0xffffffffffffffff +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_avx1_rorx_sha256_shuf_DC00: +.quad 0xffffffffffffffff, 0xb0a090803020100 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_avx1_rorx_sha256_flip_mask: +.quad 0x405060700010203, 0xc0d0e0f08090a0b +#ifndef __APPLE__ +.text +.globl Transform_Sha256_AVX1_RORX +.type Transform_Sha256_AVX1_RORX,@function +.align 4 +Transform_Sha256_AVX1_RORX: +#else +.section __TEXT,__text +.globl _Transform_Sha256_AVX1_RORX +.p2align 2 +_Transform_Sha256_AVX1_RORX: +#endif /* __APPLE__ */ + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $0x40, %rsp + vmovdqa L_avx1_rorx_sha256_flip_mask(%rip), %xmm13 + vmovdqa L_avx1_rorx_sha256_shuf_00BA(%rip), %xmm11 + vmovdqa L_avx1_rorx_sha256_shuf_DC00(%rip), %xmm12 + leaq 32(%rdi), %rax + # X0, X1, X2, X3 = W[0..15] + vmovdqu (%rax), %xmm0 + vmovdqu 16(%rax), %xmm1 + vpshufb %xmm13, %xmm0, %xmm0 + vpshufb %xmm13, %xmm1, %xmm1 + vmovdqu 32(%rax), %xmm2 + vmovdqu 48(%rax), %xmm3 + vpshufb %xmm13, %xmm2, %xmm2 + vpshufb %xmm13, %xmm3, %xmm3 + movl (%rdi), %r8d + movl 4(%rdi), %r9d + movl 8(%rdi), %r10d + movl 12(%rdi), %r11d + movl 16(%rdi), %r12d + movl 20(%rdi), %r13d + movl 24(%rdi), %r14d + movl 28(%rdi), %r15d + # set_w_k_xfer_4: 0 + vpaddd 0+L_avx1_rorx_sha256_k(%rip), %xmm0, %xmm4 + vpaddd 16+L_avx1_rorx_sha256_k(%rip), %xmm1, %xmm5 + vmovdqu %xmm4, (%rsp) + vmovdqu %xmm5, 16(%rsp) + vpaddd 32+L_avx1_rorx_sha256_k(%rip), %xmm2, %xmm6 + vpaddd 48+L_avx1_rorx_sha256_k(%rip), %xmm3, %xmm7 + vmovdqu %xmm6, 32(%rsp) + vmovdqu %xmm7, 48(%rsp) + movl %r9d, %ebx + rorxl $6, %r12d, %edx + xorl %r10d, %ebx + # msg_sched: 0-3 + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl (%rsp), %r15d + vpalignr $4, %xmm2, %xmm3, %xmm4 + vpalignr $4, %xmm0, %xmm1, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 4(%rsp), %r14d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpshufd $0xfa, %xmm3, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r8d, %ebx + addl %r14d, %r10d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 8(%rsp), %r13d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm0, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 12(%rsp), %r12d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r14d, %ebx + addl %r12d, %r8d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vpaddd %xmm4, %xmm9, %xmm0 + # msg_sched done: 0-3 + # msg_sched: 4-7 + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 16(%rsp), %r11d + vpalignr $4, %xmm3, %xmm0, %xmm4 + vpalignr $4, %xmm1, %xmm2, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 20(%rsp), %r10d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpshufd $0xfa, %xmm0, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r12d, %ebx + addl %r10d, %r14d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 24(%rsp), %r9d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm1, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 28(%rsp), %r8d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r10d, %ebx + addl %r8d, %r12d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vpaddd %xmm4, %xmm9, %xmm1 + # msg_sched done: 4-7 + # msg_sched: 8-11 + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl 32(%rsp), %r15d + vpalignr $4, %xmm0, %xmm1, %xmm4 + vpalignr $4, %xmm2, %xmm3, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 36(%rsp), %r14d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpshufd $0xfa, %xmm1, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r8d, %ebx + addl %r14d, %r10d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 40(%rsp), %r13d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm2, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 44(%rsp), %r12d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r14d, %ebx + addl %r12d, %r8d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vpaddd %xmm4, %xmm9, %xmm2 + # msg_sched done: 8-11 + # msg_sched: 12-15 + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 48(%rsp), %r11d + vpalignr $4, %xmm1, %xmm2, %xmm4 + vpalignr $4, %xmm3, %xmm0, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 52(%rsp), %r10d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpshufd $0xfa, %xmm2, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r12d, %ebx + addl %r10d, %r14d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 56(%rsp), %r9d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm3, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 60(%rsp), %r8d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r10d, %ebx + addl %r8d, %r12d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vpaddd %xmm4, %xmm9, %xmm3 + # msg_sched done: 12-15 + # set_w_k_xfer_4: 4 + vpaddd 64+L_avx1_rorx_sha256_k(%rip), %xmm0, %xmm4 + vpaddd 80+L_avx1_rorx_sha256_k(%rip), %xmm1, %xmm5 + vmovdqu %xmm4, (%rsp) + vmovdqu %xmm5, 16(%rsp) + vpaddd 96+L_avx1_rorx_sha256_k(%rip), %xmm2, %xmm6 + vpaddd 112+L_avx1_rorx_sha256_k(%rip), %xmm3, %xmm7 + vmovdqu %xmm6, 32(%rsp) + vmovdqu %xmm7, 48(%rsp) + # msg_sched: 0-3 + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl (%rsp), %r15d + vpalignr $4, %xmm2, %xmm3, %xmm4 + vpalignr $4, %xmm0, %xmm1, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 4(%rsp), %r14d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpshufd $0xfa, %xmm3, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r8d, %ebx + addl %r14d, %r10d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 8(%rsp), %r13d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm0, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 12(%rsp), %r12d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r14d, %ebx + addl %r12d, %r8d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vpaddd %xmm4, %xmm9, %xmm0 + # msg_sched done: 0-3 + # msg_sched: 4-7 + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 16(%rsp), %r11d + vpalignr $4, %xmm3, %xmm0, %xmm4 + vpalignr $4, %xmm1, %xmm2, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 20(%rsp), %r10d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpshufd $0xfa, %xmm0, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r12d, %ebx + addl %r10d, %r14d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 24(%rsp), %r9d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm1, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 28(%rsp), %r8d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r10d, %ebx + addl %r8d, %r12d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vpaddd %xmm4, %xmm9, %xmm1 + # msg_sched done: 4-7 + # msg_sched: 8-11 + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl 32(%rsp), %r15d + vpalignr $4, %xmm0, %xmm1, %xmm4 + vpalignr $4, %xmm2, %xmm3, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 36(%rsp), %r14d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpshufd $0xfa, %xmm1, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r8d, %ebx + addl %r14d, %r10d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 40(%rsp), %r13d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm2, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 44(%rsp), %r12d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r14d, %ebx + addl %r12d, %r8d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vpaddd %xmm4, %xmm9, %xmm2 + # msg_sched done: 8-11 + # msg_sched: 12-15 + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 48(%rsp), %r11d + vpalignr $4, %xmm1, %xmm2, %xmm4 + vpalignr $4, %xmm3, %xmm0, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 52(%rsp), %r10d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpshufd $0xfa, %xmm2, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r12d, %ebx + addl %r10d, %r14d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 56(%rsp), %r9d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm3, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 60(%rsp), %r8d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r10d, %ebx + addl %r8d, %r12d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vpaddd %xmm4, %xmm9, %xmm3 + # msg_sched done: 12-15 + # set_w_k_xfer_4: 8 + vpaddd 128+L_avx1_rorx_sha256_k(%rip), %xmm0, %xmm4 + vpaddd 144+L_avx1_rorx_sha256_k(%rip), %xmm1, %xmm5 + vmovdqu %xmm4, (%rsp) + vmovdqu %xmm5, 16(%rsp) + vpaddd 160+L_avx1_rorx_sha256_k(%rip), %xmm2, %xmm6 + vpaddd 176+L_avx1_rorx_sha256_k(%rip), %xmm3, %xmm7 + vmovdqu %xmm6, 32(%rsp) + vmovdqu %xmm7, 48(%rsp) + # msg_sched: 0-3 + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl (%rsp), %r15d + vpalignr $4, %xmm2, %xmm3, %xmm4 + vpalignr $4, %xmm0, %xmm1, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 4(%rsp), %r14d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpshufd $0xfa, %xmm3, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r8d, %ebx + addl %r14d, %r10d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 8(%rsp), %r13d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm0, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 12(%rsp), %r12d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r14d, %ebx + addl %r12d, %r8d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vpaddd %xmm4, %xmm9, %xmm0 + # msg_sched done: 0-3 + # msg_sched: 4-7 + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 16(%rsp), %r11d + vpalignr $4, %xmm3, %xmm0, %xmm4 + vpalignr $4, %xmm1, %xmm2, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 20(%rsp), %r10d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpshufd $0xfa, %xmm0, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r12d, %ebx + addl %r10d, %r14d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 24(%rsp), %r9d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm1, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 28(%rsp), %r8d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r10d, %ebx + addl %r8d, %r12d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vpaddd %xmm4, %xmm9, %xmm1 + # msg_sched done: 4-7 + # msg_sched: 8-11 + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl 32(%rsp), %r15d + vpalignr $4, %xmm0, %xmm1, %xmm4 + vpalignr $4, %xmm2, %xmm3, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 36(%rsp), %r14d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpshufd $0xfa, %xmm1, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r8d, %ebx + addl %r14d, %r10d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 40(%rsp), %r13d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm2, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 44(%rsp), %r12d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r14d, %ebx + addl %r12d, %r8d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vpaddd %xmm4, %xmm9, %xmm2 + # msg_sched done: 8-11 + # msg_sched: 12-15 + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 48(%rsp), %r11d + vpalignr $4, %xmm1, %xmm2, %xmm4 + vpalignr $4, %xmm3, %xmm0, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 52(%rsp), %r10d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpshufd $0xfa, %xmm2, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r12d, %ebx + addl %r10d, %r14d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 56(%rsp), %r9d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm3, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 60(%rsp), %r8d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r10d, %ebx + addl %r8d, %r12d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vpaddd %xmm4, %xmm9, %xmm3 + # msg_sched done: 12-15 + # set_w_k_xfer_4: 12 + vpaddd 192+L_avx1_rorx_sha256_k(%rip), %xmm0, %xmm4 + vpaddd 208+L_avx1_rorx_sha256_k(%rip), %xmm1, %xmm5 + vmovdqu %xmm4, (%rsp) + vmovdqu %xmm5, 16(%rsp) + vpaddd 224+L_avx1_rorx_sha256_k(%rip), %xmm2, %xmm6 + vpaddd 240+L_avx1_rorx_sha256_k(%rip), %xmm3, %xmm7 + vmovdqu %xmm6, 32(%rsp) + vmovdqu %xmm7, 48(%rsp) + xorl %eax, %eax + # rnd_all_4: 0-3 + rorxl $6, %r12d, %edx + rorxl $11, %r12d, %ecx + addl %eax, %r8d + addl (%rsp), %r15d + movl %r13d, %eax + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + xorl %ecx, %edx + andl %r12d, %eax + addl %edx, %r15d + rorxl $2, %r8d, %edx + rorxl $13, %r8d, %ecx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + andl %eax, %ebx + addl %edx, %r15d + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + rorxl $11, %r11d, %ecx + addl %ebx, %r15d + addl 4(%rsp), %r14d + movl %r12d, %ebx + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + xorl %ecx, %edx + andl %r11d, %ebx + addl %edx, %r14d + rorxl $2, %r15d, %edx + rorxl $13, %r15d, %ecx + xorl %r13d, %ebx + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + xorl %ecx, %edx + movl %r8d, %ebx + addl %r14d, %r10d + xorl %r15d, %ebx + andl %ebx, %eax + addl %edx, %r14d + xorl %r8d, %eax + rorxl $6, %r10d, %edx + rorxl $11, %r10d, %ecx + addl %eax, %r14d + addl 8(%rsp), %r13d + movl %r11d, %eax + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + xorl %ecx, %edx + andl %r10d, %eax + addl %edx, %r13d + rorxl $2, %r14d, %edx + rorxl $13, %r14d, %ecx + xorl %r12d, %eax + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + xorl %r14d, %eax + andl %eax, %ebx + addl %edx, %r13d + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + rorxl $11, %r9d, %ecx + addl %ebx, %r13d + addl 12(%rsp), %r12d + movl %r10d, %ebx + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + xorl %ecx, %edx + andl %r9d, %ebx + addl %edx, %r12d + rorxl $2, %r13d, %edx + rorxl $13, %r13d, %ecx + xorl %r11d, %ebx + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + xorl %ecx, %edx + movl %r14d, %ebx + addl %r12d, %r8d + xorl %r13d, %ebx + andl %ebx, %eax + addl %edx, %r12d + xorl %r14d, %eax + # rnd_all_4: 1-4 + rorxl $6, %r8d, %edx + rorxl $11, %r8d, %ecx + addl %eax, %r12d + addl 16(%rsp), %r11d + movl %r9d, %eax + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + xorl %ecx, %edx + andl %r8d, %eax + addl %edx, %r11d + rorxl $2, %r12d, %edx + rorxl $13, %r12d, %ecx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + andl %eax, %ebx + addl %edx, %r11d + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + rorxl $11, %r15d, %ecx + addl %ebx, %r11d + addl 20(%rsp), %r10d + movl %r8d, %ebx + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + xorl %ecx, %edx + andl %r15d, %ebx + addl %edx, %r10d + rorxl $2, %r11d, %edx + rorxl $13, %r11d, %ecx + xorl %r9d, %ebx + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + xorl %ecx, %edx + movl %r12d, %ebx + addl %r10d, %r14d + xorl %r11d, %ebx + andl %ebx, %eax + addl %edx, %r10d + xorl %r12d, %eax + rorxl $6, %r14d, %edx + rorxl $11, %r14d, %ecx + addl %eax, %r10d + addl 24(%rsp), %r9d + movl %r15d, %eax + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + xorl %ecx, %edx + andl %r14d, %eax + addl %edx, %r9d + rorxl $2, %r10d, %edx + rorxl $13, %r10d, %ecx + xorl %r8d, %eax + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + xorl %r10d, %eax + andl %eax, %ebx + addl %edx, %r9d + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + rorxl $11, %r13d, %ecx + addl %ebx, %r9d + addl 28(%rsp), %r8d + movl %r14d, %ebx + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + xorl %ecx, %edx + andl %r13d, %ebx + addl %edx, %r8d + rorxl $2, %r9d, %edx + rorxl $13, %r9d, %ecx + xorl %r15d, %ebx + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + xorl %ecx, %edx + movl %r10d, %ebx + addl %r8d, %r12d + xorl %r9d, %ebx + andl %ebx, %eax + addl %edx, %r8d + xorl %r10d, %eax + # rnd_all_4: 2-5 + rorxl $6, %r12d, %edx + rorxl $11, %r12d, %ecx + addl %eax, %r8d + addl 32(%rsp), %r15d + movl %r13d, %eax + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + xorl %ecx, %edx + andl %r12d, %eax + addl %edx, %r15d + rorxl $2, %r8d, %edx + rorxl $13, %r8d, %ecx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + andl %eax, %ebx + addl %edx, %r15d + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + rorxl $11, %r11d, %ecx + addl %ebx, %r15d + addl 36(%rsp), %r14d + movl %r12d, %ebx + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + xorl %ecx, %edx + andl %r11d, %ebx + addl %edx, %r14d + rorxl $2, %r15d, %edx + rorxl $13, %r15d, %ecx + xorl %r13d, %ebx + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + xorl %ecx, %edx + movl %r8d, %ebx + addl %r14d, %r10d + xorl %r15d, %ebx + andl %ebx, %eax + addl %edx, %r14d + xorl %r8d, %eax + rorxl $6, %r10d, %edx + rorxl $11, %r10d, %ecx + addl %eax, %r14d + addl 40(%rsp), %r13d + movl %r11d, %eax + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + xorl %ecx, %edx + andl %r10d, %eax + addl %edx, %r13d + rorxl $2, %r14d, %edx + rorxl $13, %r14d, %ecx + xorl %r12d, %eax + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + xorl %r14d, %eax + andl %eax, %ebx + addl %edx, %r13d + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + rorxl $11, %r9d, %ecx + addl %ebx, %r13d + addl 44(%rsp), %r12d + movl %r10d, %ebx + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + xorl %ecx, %edx + andl %r9d, %ebx + addl %edx, %r12d + rorxl $2, %r13d, %edx + rorxl $13, %r13d, %ecx + xorl %r11d, %ebx + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + xorl %ecx, %edx + movl %r14d, %ebx + addl %r12d, %r8d + xorl %r13d, %ebx + andl %ebx, %eax + addl %edx, %r12d + xorl %r14d, %eax + # rnd_all_4: 3-6 + rorxl $6, %r8d, %edx + rorxl $11, %r8d, %ecx + addl %eax, %r12d + addl 48(%rsp), %r11d + movl %r9d, %eax + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + xorl %ecx, %edx + andl %r8d, %eax + addl %edx, %r11d + rorxl $2, %r12d, %edx + rorxl $13, %r12d, %ecx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + andl %eax, %ebx + addl %edx, %r11d + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + rorxl $11, %r15d, %ecx + addl %ebx, %r11d + addl 52(%rsp), %r10d + movl %r8d, %ebx + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + xorl %ecx, %edx + andl %r15d, %ebx + addl %edx, %r10d + rorxl $2, %r11d, %edx + rorxl $13, %r11d, %ecx + xorl %r9d, %ebx + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + xorl %ecx, %edx + movl %r12d, %ebx + addl %r10d, %r14d + xorl %r11d, %ebx + andl %ebx, %eax + addl %edx, %r10d + xorl %r12d, %eax + rorxl $6, %r14d, %edx + rorxl $11, %r14d, %ecx + addl %eax, %r10d + addl 56(%rsp), %r9d + movl %r15d, %eax + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + xorl %ecx, %edx + andl %r14d, %eax + addl %edx, %r9d + rorxl $2, %r10d, %edx + rorxl $13, %r10d, %ecx + xorl %r8d, %eax + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + xorl %r10d, %eax + andl %eax, %ebx + addl %edx, %r9d + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + rorxl $11, %r13d, %ecx + addl %ebx, %r9d + addl 60(%rsp), %r8d + movl %r14d, %ebx + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + xorl %ecx, %edx + andl %r13d, %ebx + addl %edx, %r8d + rorxl $2, %r9d, %edx + rorxl $13, %r9d, %ecx + xorl %r15d, %ebx + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + xorl %ecx, %edx + movl %r10d, %ebx + addl %r8d, %r12d + xorl %r9d, %ebx + andl %ebx, %eax + addl %edx, %r8d + xorl %r10d, %eax + addl %eax, %r8d + addl %r8d, (%rdi) + addl %r9d, 4(%rdi) + addl %r10d, 8(%rdi) + addl %r11d, 12(%rdi) + addl %r12d, 16(%rdi) + addl %r13d, 20(%rdi) + addl %r14d, 24(%rdi) + addl %r15d, 28(%rdi) + xorq %rax, %rax + vzeroupper + addq $0x40, %rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + repz retq +#ifndef __APPLE__ +.size Transform_Sha256_AVX1_RORX,.-Transform_Sha256_AVX1_RORX +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl Transform_Sha256_AVX1_RORX_Len +.type Transform_Sha256_AVX1_RORX_Len,@function +.align 4 +Transform_Sha256_AVX1_RORX_Len: +#else +.section __TEXT,__text +.globl _Transform_Sha256_AVX1_RORX_Len +.p2align 2 +_Transform_Sha256_AVX1_RORX_Len: +#endif /* __APPLE__ */ + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushq %rbp + movq %rsi, %rbp + movq %rdx, %rsi + subq $0x40, %rsp + vmovdqa L_avx1_rorx_sha256_flip_mask(%rip), %xmm13 + vmovdqa L_avx1_rorx_sha256_shuf_00BA(%rip), %xmm11 + vmovdqa L_avx1_rorx_sha256_shuf_DC00(%rip), %xmm12 + movl (%rdi), %r8d + movl 4(%rdi), %r9d + movl 8(%rdi), %r10d + movl 12(%rdi), %r11d + movl 16(%rdi), %r12d + movl 20(%rdi), %r13d + movl 24(%rdi), %r14d + movl 28(%rdi), %r15d + # Start of loop processing a block +L_sha256_len_avx1_len_rorx_start: + # X0, X1, X2, X3 = W[0..15] + vmovdqu (%rbp), %xmm0 + vmovdqu 16(%rbp), %xmm1 + vpshufb %xmm13, %xmm0, %xmm0 + vpshufb %xmm13, %xmm1, %xmm1 + vmovdqu 32(%rbp), %xmm2 + vmovdqu 48(%rbp), %xmm3 + vpshufb %xmm13, %xmm2, %xmm2 + vpshufb %xmm13, %xmm3, %xmm3 + # set_w_k_xfer_4: 0 + vpaddd 0+L_avx1_rorx_sha256_k(%rip), %xmm0, %xmm4 + vpaddd 16+L_avx1_rorx_sha256_k(%rip), %xmm1, %xmm5 + vmovdqu %xmm4, (%rsp) + vmovdqu %xmm5, 16(%rsp) + vpaddd 32+L_avx1_rorx_sha256_k(%rip), %xmm2, %xmm6 + vpaddd 48+L_avx1_rorx_sha256_k(%rip), %xmm3, %xmm7 + vmovdqu %xmm6, 32(%rsp) + vmovdqu %xmm7, 48(%rsp) + movl %r9d, %ebx + rorxl $6, %r12d, %edx + xorl %r10d, %ebx + # msg_sched: 0-3 + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl (%rsp), %r15d + vpalignr $4, %xmm2, %xmm3, %xmm4 + vpalignr $4, %xmm0, %xmm1, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 4(%rsp), %r14d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpshufd $0xfa, %xmm3, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r8d, %ebx + addl %r14d, %r10d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 8(%rsp), %r13d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm0, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 12(%rsp), %r12d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r14d, %ebx + addl %r12d, %r8d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vpaddd %xmm4, %xmm9, %xmm0 + # msg_sched done: 0-3 + # msg_sched: 4-7 + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 16(%rsp), %r11d + vpalignr $4, %xmm3, %xmm0, %xmm4 + vpalignr $4, %xmm1, %xmm2, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 20(%rsp), %r10d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpshufd $0xfa, %xmm0, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r12d, %ebx + addl %r10d, %r14d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 24(%rsp), %r9d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm1, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 28(%rsp), %r8d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r10d, %ebx + addl %r8d, %r12d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vpaddd %xmm4, %xmm9, %xmm1 + # msg_sched done: 4-7 + # msg_sched: 8-11 + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl 32(%rsp), %r15d + vpalignr $4, %xmm0, %xmm1, %xmm4 + vpalignr $4, %xmm2, %xmm3, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 36(%rsp), %r14d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpshufd $0xfa, %xmm1, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r8d, %ebx + addl %r14d, %r10d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 40(%rsp), %r13d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm2, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 44(%rsp), %r12d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r14d, %ebx + addl %r12d, %r8d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vpaddd %xmm4, %xmm9, %xmm2 + # msg_sched done: 8-11 + # msg_sched: 12-15 + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 48(%rsp), %r11d + vpalignr $4, %xmm1, %xmm2, %xmm4 + vpalignr $4, %xmm3, %xmm0, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 52(%rsp), %r10d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpshufd $0xfa, %xmm2, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r12d, %ebx + addl %r10d, %r14d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 56(%rsp), %r9d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm3, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 60(%rsp), %r8d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r10d, %ebx + addl %r8d, %r12d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vpaddd %xmm4, %xmm9, %xmm3 + # msg_sched done: 12-15 + # set_w_k_xfer_4: 4 + vpaddd 64+L_avx1_rorx_sha256_k(%rip), %xmm0, %xmm4 + vpaddd 80+L_avx1_rorx_sha256_k(%rip), %xmm1, %xmm5 + vmovdqu %xmm4, (%rsp) + vmovdqu %xmm5, 16(%rsp) + vpaddd 96+L_avx1_rorx_sha256_k(%rip), %xmm2, %xmm6 + vpaddd 112+L_avx1_rorx_sha256_k(%rip), %xmm3, %xmm7 + vmovdqu %xmm6, 32(%rsp) + vmovdqu %xmm7, 48(%rsp) + # msg_sched: 0-3 + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl (%rsp), %r15d + vpalignr $4, %xmm2, %xmm3, %xmm4 + vpalignr $4, %xmm0, %xmm1, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 4(%rsp), %r14d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpshufd $0xfa, %xmm3, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r8d, %ebx + addl %r14d, %r10d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 8(%rsp), %r13d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm0, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 12(%rsp), %r12d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r14d, %ebx + addl %r12d, %r8d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vpaddd %xmm4, %xmm9, %xmm0 + # msg_sched done: 0-3 + # msg_sched: 4-7 + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 16(%rsp), %r11d + vpalignr $4, %xmm3, %xmm0, %xmm4 + vpalignr $4, %xmm1, %xmm2, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 20(%rsp), %r10d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpshufd $0xfa, %xmm0, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r12d, %ebx + addl %r10d, %r14d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 24(%rsp), %r9d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm1, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 28(%rsp), %r8d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r10d, %ebx + addl %r8d, %r12d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vpaddd %xmm4, %xmm9, %xmm1 + # msg_sched done: 4-7 + # msg_sched: 8-11 + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl 32(%rsp), %r15d + vpalignr $4, %xmm0, %xmm1, %xmm4 + vpalignr $4, %xmm2, %xmm3, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 36(%rsp), %r14d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpshufd $0xfa, %xmm1, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r8d, %ebx + addl %r14d, %r10d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 40(%rsp), %r13d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm2, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 44(%rsp), %r12d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r14d, %ebx + addl %r12d, %r8d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vpaddd %xmm4, %xmm9, %xmm2 + # msg_sched done: 8-11 + # msg_sched: 12-15 + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 48(%rsp), %r11d + vpalignr $4, %xmm1, %xmm2, %xmm4 + vpalignr $4, %xmm3, %xmm0, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 52(%rsp), %r10d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpshufd $0xfa, %xmm2, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r12d, %ebx + addl %r10d, %r14d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 56(%rsp), %r9d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm3, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 60(%rsp), %r8d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r10d, %ebx + addl %r8d, %r12d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vpaddd %xmm4, %xmm9, %xmm3 + # msg_sched done: 12-15 + # set_w_k_xfer_4: 8 + vpaddd 128+L_avx1_rorx_sha256_k(%rip), %xmm0, %xmm4 + vpaddd 144+L_avx1_rorx_sha256_k(%rip), %xmm1, %xmm5 + vmovdqu %xmm4, (%rsp) + vmovdqu %xmm5, 16(%rsp) + vpaddd 160+L_avx1_rorx_sha256_k(%rip), %xmm2, %xmm6 + vpaddd 176+L_avx1_rorx_sha256_k(%rip), %xmm3, %xmm7 + vmovdqu %xmm6, 32(%rsp) + vmovdqu %xmm7, 48(%rsp) + # msg_sched: 0-3 + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl (%rsp), %r15d + vpalignr $4, %xmm2, %xmm3, %xmm4 + vpalignr $4, %xmm0, %xmm1, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 4(%rsp), %r14d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpshufd $0xfa, %xmm3, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r8d, %ebx + addl %r14d, %r10d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 8(%rsp), %r13d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm0, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 12(%rsp), %r12d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r14d, %ebx + addl %r12d, %r8d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vpaddd %xmm4, %xmm9, %xmm0 + # msg_sched done: 0-3 + # msg_sched: 4-7 + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 16(%rsp), %r11d + vpalignr $4, %xmm3, %xmm0, %xmm4 + vpalignr $4, %xmm1, %xmm2, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 20(%rsp), %r10d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpshufd $0xfa, %xmm0, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r12d, %ebx + addl %r10d, %r14d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 24(%rsp), %r9d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm1, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 28(%rsp), %r8d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r10d, %ebx + addl %r8d, %r12d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vpaddd %xmm4, %xmm9, %xmm1 + # msg_sched done: 4-7 + # msg_sched: 8-11 + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl 32(%rsp), %r15d + vpalignr $4, %xmm0, %xmm1, %xmm4 + vpalignr $4, %xmm2, %xmm3, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 36(%rsp), %r14d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpshufd $0xfa, %xmm1, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r8d, %ebx + addl %r14d, %r10d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 40(%rsp), %r13d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm2, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 44(%rsp), %r12d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r14d, %ebx + addl %r12d, %r8d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vpaddd %xmm4, %xmm9, %xmm2 + # msg_sched done: 8-11 + # msg_sched: 12-15 + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 48(%rsp), %r11d + vpalignr $4, %xmm1, %xmm2, %xmm4 + vpalignr $4, %xmm3, %xmm0, %xmm5 + # rnd_0: 1 - 2 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %xmm5, %xmm6 + vpslld $25, %xmm5, %xmm7 + # rnd_0: 3 - 4 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $3, %xmm5, %xmm8 + vpor %xmm6, %xmm7, %xmm7 + # rnd_0: 5 - 7 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 52(%rsp), %r10d + vpsrld $18, %xmm5, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpslld $14, %xmm5, %xmm5 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpxor %xmm5, %xmm7, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %xmm6, %xmm7, %xmm7 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpshufd $0xfa, %xmm2, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r12d, %ebx + addl %r10d, %r14d + vpxor %xmm8, %xmm7, %xmm5 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrld $10, %xmm6, %xmm8 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 56(%rsp), %r9d + vpsrlq $19, %xmm6, %xmm7 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpsrlq $0x11, %xmm6, %xmm6 + vpaddd %xmm3, %xmm4, %xmm4 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %xmm5, %xmm4, %xmm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpxor %xmm7, %xmm6, %xmm6 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpxor %xmm6, %xmm8, %xmm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufb %xmm11, %xmm8, %xmm8 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpaddd %xmm8, %xmm4, %xmm4 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 60(%rsp), %r8d + vpshufd $0x50, %xmm4, %xmm6 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpsrld $10, %xmm6, %xmm9 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpsrlq $19, %xmm6, %xmm7 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpsrlq $0x11, %xmm6, %xmm6 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpxor %xmm7, %xmm6, %xmm6 + # rnd_1: 5 - 5 + xorl %ecx, %edx + movl %r10d, %ebx + addl %r8d, %r12d + vpxor %xmm6, %xmm9, %xmm9 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + vpshufb %xmm12, %xmm9, %xmm9 + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vpaddd %xmm4, %xmm9, %xmm3 + # msg_sched done: 12-15 + # set_w_k_xfer_4: 12 + vpaddd 192+L_avx1_rorx_sha256_k(%rip), %xmm0, %xmm4 + vpaddd 208+L_avx1_rorx_sha256_k(%rip), %xmm1, %xmm5 + vmovdqu %xmm4, (%rsp) + vmovdqu %xmm5, 16(%rsp) + vpaddd 224+L_avx1_rorx_sha256_k(%rip), %xmm2, %xmm6 + vpaddd 240+L_avx1_rorx_sha256_k(%rip), %xmm3, %xmm7 + vmovdqu %xmm6, 32(%rsp) + vmovdqu %xmm7, 48(%rsp) + xorl %eax, %eax + xorl %ecx, %ecx + # rnd_all_4: 0-3 + rorxl $6, %r12d, %edx + rorxl $11, %r12d, %ecx + addl %eax, %r8d + addl (%rsp), %r15d + movl %r13d, %eax + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + xorl %ecx, %edx + andl %r12d, %eax + addl %edx, %r15d + rorxl $2, %r8d, %edx + rorxl $13, %r8d, %ecx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + andl %eax, %ebx + addl %edx, %r15d + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + rorxl $11, %r11d, %ecx + addl %ebx, %r15d + addl 4(%rsp), %r14d + movl %r12d, %ebx + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + xorl %ecx, %edx + andl %r11d, %ebx + addl %edx, %r14d + rorxl $2, %r15d, %edx + rorxl $13, %r15d, %ecx + xorl %r13d, %ebx + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + xorl %ecx, %edx + movl %r8d, %ebx + addl %r14d, %r10d + xorl %r15d, %ebx + andl %ebx, %eax + addl %edx, %r14d + xorl %r8d, %eax + rorxl $6, %r10d, %edx + rorxl $11, %r10d, %ecx + addl %eax, %r14d + addl 8(%rsp), %r13d + movl %r11d, %eax + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + xorl %ecx, %edx + andl %r10d, %eax + addl %edx, %r13d + rorxl $2, %r14d, %edx + rorxl $13, %r14d, %ecx + xorl %r12d, %eax + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + xorl %r14d, %eax + andl %eax, %ebx + addl %edx, %r13d + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + rorxl $11, %r9d, %ecx + addl %ebx, %r13d + addl 12(%rsp), %r12d + movl %r10d, %ebx + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + xorl %ecx, %edx + andl %r9d, %ebx + addl %edx, %r12d + rorxl $2, %r13d, %edx + rorxl $13, %r13d, %ecx + xorl %r11d, %ebx + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + xorl %ecx, %edx + movl %r14d, %ebx + addl %r12d, %r8d + xorl %r13d, %ebx + andl %ebx, %eax + addl %edx, %r12d + xorl %r14d, %eax + # rnd_all_4: 1-4 + rorxl $6, %r8d, %edx + rorxl $11, %r8d, %ecx + addl %eax, %r12d + addl 16(%rsp), %r11d + movl %r9d, %eax + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + xorl %ecx, %edx + andl %r8d, %eax + addl %edx, %r11d + rorxl $2, %r12d, %edx + rorxl $13, %r12d, %ecx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + andl %eax, %ebx + addl %edx, %r11d + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + rorxl $11, %r15d, %ecx + addl %ebx, %r11d + addl 20(%rsp), %r10d + movl %r8d, %ebx + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + xorl %ecx, %edx + andl %r15d, %ebx + addl %edx, %r10d + rorxl $2, %r11d, %edx + rorxl $13, %r11d, %ecx + xorl %r9d, %ebx + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + xorl %ecx, %edx + movl %r12d, %ebx + addl %r10d, %r14d + xorl %r11d, %ebx + andl %ebx, %eax + addl %edx, %r10d + xorl %r12d, %eax + rorxl $6, %r14d, %edx + rorxl $11, %r14d, %ecx + addl %eax, %r10d + addl 24(%rsp), %r9d + movl %r15d, %eax + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + xorl %ecx, %edx + andl %r14d, %eax + addl %edx, %r9d + rorxl $2, %r10d, %edx + rorxl $13, %r10d, %ecx + xorl %r8d, %eax + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + xorl %r10d, %eax + andl %eax, %ebx + addl %edx, %r9d + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + rorxl $11, %r13d, %ecx + addl %ebx, %r9d + addl 28(%rsp), %r8d + movl %r14d, %ebx + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + xorl %ecx, %edx + andl %r13d, %ebx + addl %edx, %r8d + rorxl $2, %r9d, %edx + rorxl $13, %r9d, %ecx + xorl %r15d, %ebx + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + xorl %ecx, %edx + movl %r10d, %ebx + addl %r8d, %r12d + xorl %r9d, %ebx + andl %ebx, %eax + addl %edx, %r8d + xorl %r10d, %eax + # rnd_all_4: 2-5 + rorxl $6, %r12d, %edx + rorxl $11, %r12d, %ecx + addl %eax, %r8d + addl 32(%rsp), %r15d + movl %r13d, %eax + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + xorl %ecx, %edx + andl %r12d, %eax + addl %edx, %r15d + rorxl $2, %r8d, %edx + rorxl $13, %r8d, %ecx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + andl %eax, %ebx + addl %edx, %r15d + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + rorxl $11, %r11d, %ecx + addl %ebx, %r15d + addl 36(%rsp), %r14d + movl %r12d, %ebx + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + xorl %ecx, %edx + andl %r11d, %ebx + addl %edx, %r14d + rorxl $2, %r15d, %edx + rorxl $13, %r15d, %ecx + xorl %r13d, %ebx + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + xorl %ecx, %edx + movl %r8d, %ebx + addl %r14d, %r10d + xorl %r15d, %ebx + andl %ebx, %eax + addl %edx, %r14d + xorl %r8d, %eax + rorxl $6, %r10d, %edx + rorxl $11, %r10d, %ecx + addl %eax, %r14d + addl 40(%rsp), %r13d + movl %r11d, %eax + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + xorl %ecx, %edx + andl %r10d, %eax + addl %edx, %r13d + rorxl $2, %r14d, %edx + rorxl $13, %r14d, %ecx + xorl %r12d, %eax + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + xorl %r14d, %eax + andl %eax, %ebx + addl %edx, %r13d + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + rorxl $11, %r9d, %ecx + addl %ebx, %r13d + addl 44(%rsp), %r12d + movl %r10d, %ebx + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + xorl %ecx, %edx + andl %r9d, %ebx + addl %edx, %r12d + rorxl $2, %r13d, %edx + rorxl $13, %r13d, %ecx + xorl %r11d, %ebx + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + xorl %ecx, %edx + movl %r14d, %ebx + addl %r12d, %r8d + xorl %r13d, %ebx + andl %ebx, %eax + addl %edx, %r12d + xorl %r14d, %eax + # rnd_all_4: 3-6 + rorxl $6, %r8d, %edx + rorxl $11, %r8d, %ecx + addl %eax, %r12d + addl 48(%rsp), %r11d + movl %r9d, %eax + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + xorl %ecx, %edx + andl %r8d, %eax + addl %edx, %r11d + rorxl $2, %r12d, %edx + rorxl $13, %r12d, %ecx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + andl %eax, %ebx + addl %edx, %r11d + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + rorxl $11, %r15d, %ecx + addl %ebx, %r11d + addl 52(%rsp), %r10d + movl %r8d, %ebx + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + xorl %ecx, %edx + andl %r15d, %ebx + addl %edx, %r10d + rorxl $2, %r11d, %edx + rorxl $13, %r11d, %ecx + xorl %r9d, %ebx + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + xorl %ecx, %edx + movl %r12d, %ebx + addl %r10d, %r14d + xorl %r11d, %ebx + andl %ebx, %eax + addl %edx, %r10d + xorl %r12d, %eax + rorxl $6, %r14d, %edx + rorxl $11, %r14d, %ecx + addl %eax, %r10d + addl 56(%rsp), %r9d + movl %r15d, %eax + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + xorl %ecx, %edx + andl %r14d, %eax + addl %edx, %r9d + rorxl $2, %r10d, %edx + rorxl $13, %r10d, %ecx + xorl %r8d, %eax + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + xorl %r10d, %eax + andl %eax, %ebx + addl %edx, %r9d + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + rorxl $11, %r13d, %ecx + addl %ebx, %r9d + addl 60(%rsp), %r8d + movl %r14d, %ebx + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + xorl %ecx, %edx + andl %r13d, %ebx + addl %edx, %r8d + rorxl $2, %r9d, %edx + rorxl $13, %r9d, %ecx + xorl %r15d, %ebx + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + xorl %ecx, %edx + movl %r10d, %ebx + addl %r8d, %r12d + xorl %r9d, %ebx + andl %ebx, %eax + addl %edx, %r8d + xorl %r10d, %eax + addl %eax, %r8d + addl (%rdi), %r8d + addl 4(%rdi), %r9d + addl 8(%rdi), %r10d + addl 12(%rdi), %r11d + addl 16(%rdi), %r12d + addl 20(%rdi), %r13d + addl 24(%rdi), %r14d + addl 28(%rdi), %r15d + addq $0x40, %rbp + subl $0x40, %esi + movl %r8d, (%rdi) + movl %r9d, 4(%rdi) + movl %r10d, 8(%rdi) + movl %r11d, 12(%rdi) + movl %r12d, 16(%rdi) + movl %r13d, 20(%rdi) + movl %r14d, 24(%rdi) + movl %r15d, 28(%rdi) + jnz L_sha256_len_avx1_len_rorx_start + xorq %rax, %rax + vzeroupper + addq $0x40, %rsp + popq %rbp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + repz retq +#ifndef __APPLE__ +.size Transform_Sha256_AVX1_RORX_Len,.-Transform_Sha256_AVX1_RORX_Len +#endif /* __APPLE__ */ +#endif /* HAVE_INTEL_AVX1 */ +#ifdef HAVE_INTEL_AVX2 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +L_avx2_sha256_k: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0xfc19dc6,0x240ca1cc +.long 0xe49b69c1,0xefbe4786,0xfc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x6ca6351,0x14292967 +.long 0xc6e00bf3,0xd5a79147,0x6ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 32 +#else +.p2align 5 +#endif /* __APPLE__ */ +L_avx2_sha256_shuf_00BA: +.quad 0xb0a090803020100, 0xffffffffffffffff +.quad 0xb0a090803020100, 0xffffffffffffffff +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 32 +#else +.p2align 5 +#endif /* __APPLE__ */ +L_avx2_sha256_shuf_DC00: +.quad 0xffffffffffffffff, 0xb0a090803020100 +.quad 0xffffffffffffffff, 0xb0a090803020100 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 32 +#else +.p2align 5 +#endif /* __APPLE__ */ +L_avx2_sha256_flip_mask: +.quad 0x405060700010203, 0xc0d0e0f08090a0b +.quad 0x405060700010203, 0xc0d0e0f08090a0b +#ifndef __APPLE__ +.text +.globl Transform_Sha256_AVX2 +.type Transform_Sha256_AVX2,@function +.align 4 +Transform_Sha256_AVX2: +#else +.section __TEXT,__text +.globl _Transform_Sha256_AVX2 +.p2align 2 +_Transform_Sha256_AVX2: +#endif /* __APPLE__ */ + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $0x200, %rsp + leaq 32(%rdi), %rax + vmovdqa L_avx2_sha256_flip_mask(%rip), %xmm13 + vmovdqa L_avx2_sha256_shuf_00BA(%rip), %ymm11 + vmovdqa L_avx2_sha256_shuf_DC00(%rip), %ymm12 + movl (%rdi), %r8d + movl 4(%rdi), %r9d + movl 8(%rdi), %r10d + movl 12(%rdi), %r11d + movl 16(%rdi), %r12d + movl 20(%rdi), %r13d + movl 24(%rdi), %r14d + movl 28(%rdi), %r15d + # X0, X1, X2, X3 = W[0..15] + vmovdqu (%rax), %xmm0 + vmovdqu 16(%rax), %xmm1 + vpshufb %xmm13, %xmm0, %xmm0 + vpshufb %xmm13, %xmm1, %xmm1 + vmovdqu 32(%rax), %xmm2 + vmovdqu 48(%rax), %xmm3 + vpshufb %xmm13, %xmm2, %xmm2 + vpshufb %xmm13, %xmm3, %xmm3 + movl %r9d, %ebx + movl %r12d, %edx + xorl %r10d, %ebx + # set_w_k_xfer_4: 0 + vpaddd 0+L_avx2_sha256_k(%rip), %ymm0, %ymm4 + vpaddd 32+L_avx2_sha256_k(%rip), %ymm1, %ymm5 + vmovdqu %ymm4, (%rsp) + vmovdqu %ymm5, 32(%rsp) + vpaddd 64+L_avx2_sha256_k(%rip), %ymm2, %ymm4 + vpaddd 96+L_avx2_sha256_k(%rip), %ymm3, %ymm5 + vmovdqu %ymm4, 64(%rsp) + vmovdqu %ymm5, 96(%rsp) + # msg_sched: 0-3 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm0, %ymm1, %ymm5 + vpalignr $4, %ymm2, %ymm3, %ymm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl (%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 4(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm3, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm0, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 8(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 12(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %ymm4, %ymm9, %ymm0 + # msg_sched done: 0-3 + # msg_sched: 8-11 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm1, %ymm2, %ymm5 + vpalignr $4, %ymm3, %ymm0, %ymm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 32(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 36(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm0, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm1, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 40(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 44(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %ymm4, %ymm9, %ymm1 + # msg_sched done: 8-11 + # msg_sched: 16-19 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm2, %ymm3, %ymm5 + vpalignr $4, %ymm0, %ymm1, %ymm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl 64(%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 68(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm1, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm2, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 72(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 76(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %ymm4, %ymm9, %ymm2 + # msg_sched done: 16-19 + # msg_sched: 24-27 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm3, %ymm0, %ymm5 + vpalignr $4, %ymm1, %ymm2, %ymm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 96(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 100(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm2, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm3, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 104(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 108(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %ymm4, %ymm9, %ymm3 + # msg_sched done: 24-27 + # set_w_k_xfer_4: 4 + vpaddd 128+L_avx2_sha256_k(%rip), %ymm0, %ymm4 + vpaddd 160+L_avx2_sha256_k(%rip), %ymm1, %ymm5 + vmovdqu %ymm4, 128(%rsp) + vmovdqu %ymm5, 160(%rsp) + vpaddd 192+L_avx2_sha256_k(%rip), %ymm2, %ymm4 + vpaddd 224+L_avx2_sha256_k(%rip), %ymm3, %ymm5 + vmovdqu %ymm4, 192(%rsp) + vmovdqu %ymm5, 224(%rsp) + # msg_sched: 32-35 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm0, %ymm1, %ymm5 + vpalignr $4, %ymm2, %ymm3, %ymm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl 128(%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 132(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm3, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm0, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 136(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 140(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %ymm4, %ymm9, %ymm0 + # msg_sched done: 32-35 + # msg_sched: 40-43 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm1, %ymm2, %ymm5 + vpalignr $4, %ymm3, %ymm0, %ymm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 160(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 164(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm0, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm1, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 168(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 172(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %ymm4, %ymm9, %ymm1 + # msg_sched done: 40-43 + # msg_sched: 48-51 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm2, %ymm3, %ymm5 + vpalignr $4, %ymm0, %ymm1, %ymm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl 192(%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 196(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm1, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm2, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 200(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 204(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %ymm4, %ymm9, %ymm2 + # msg_sched done: 48-51 + # msg_sched: 56-59 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm3, %ymm0, %ymm5 + vpalignr $4, %ymm1, %ymm2, %ymm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 224(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 228(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm2, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm3, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 232(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 236(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %ymm4, %ymm9, %ymm3 + # msg_sched done: 56-59 + # set_w_k_xfer_4: 8 + vpaddd 256+L_avx2_sha256_k(%rip), %ymm0, %ymm4 + vpaddd 288+L_avx2_sha256_k(%rip), %ymm1, %ymm5 + vmovdqu %ymm4, 256(%rsp) + vmovdqu %ymm5, 288(%rsp) + vpaddd 320+L_avx2_sha256_k(%rip), %ymm2, %ymm4 + vpaddd 352+L_avx2_sha256_k(%rip), %ymm3, %ymm5 + vmovdqu %ymm4, 320(%rsp) + vmovdqu %ymm5, 352(%rsp) + # msg_sched: 64-67 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm0, %ymm1, %ymm5 + vpalignr $4, %ymm2, %ymm3, %ymm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl 256(%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 260(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm3, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm0, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 264(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 268(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %ymm4, %ymm9, %ymm0 + # msg_sched done: 64-67 + # msg_sched: 72-75 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm1, %ymm2, %ymm5 + vpalignr $4, %ymm3, %ymm0, %ymm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 288(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 292(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm0, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm1, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 296(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 300(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %ymm4, %ymm9, %ymm1 + # msg_sched done: 72-75 + # msg_sched: 80-83 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm2, %ymm3, %ymm5 + vpalignr $4, %ymm0, %ymm1, %ymm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl 320(%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 324(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm1, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm2, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 328(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 332(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %ymm4, %ymm9, %ymm2 + # msg_sched done: 80-83 + # msg_sched: 88-91 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm3, %ymm0, %ymm5 + vpalignr $4, %ymm1, %ymm2, %ymm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 352(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 356(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm2, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm3, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 360(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 364(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %ymm4, %ymm9, %ymm3 + # msg_sched done: 88-91 + # set_w_k_xfer_4: 12 + vpaddd 384+L_avx2_sha256_k(%rip), %ymm0, %ymm4 + vpaddd 416+L_avx2_sha256_k(%rip), %ymm1, %ymm5 + vmovdqu %ymm4, 384(%rsp) + vmovdqu %ymm5, 416(%rsp) + vpaddd 448+L_avx2_sha256_k(%rip), %ymm2, %ymm4 + vpaddd 480+L_avx2_sha256_k(%rip), %ymm3, %ymm5 + vmovdqu %ymm4, 448(%rsp) + vmovdqu %ymm5, 480(%rsp) + # rnd_all_4: 24-27 + addl 384(%rsp), %r15d + movl %r13d, %ecx + movl %r9d, %eax + xorl %r14d, %ecx + rorl $14, %edx + andl %r12d, %ecx + xorl %r12d, %edx + xorl %r14d, %ecx + rorl $5, %edx + addl %ecx, %r15d + xorl %r12d, %edx + xorl %r8d, %eax + rorl $6, %edx + movl %r8d, %ecx + addl %edx, %r15d + rorl $9, %ecx + andl %eax, %ebx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + addl 388(%rsp), %r14d + movl %r12d, %ecx + movl %r8d, %ebx + xorl %r13d, %ecx + rorl $14, %edx + andl %r11d, %ecx + xorl %r11d, %edx + xorl %r13d, %ecx + rorl $5, %edx + addl %ecx, %r14d + xorl %r11d, %edx + xorl %r15d, %ebx + rorl $6, %edx + movl %r15d, %ecx + addl %edx, %r14d + rorl $9, %ecx + andl %ebx, %eax + xorl %r15d, %ecx + xorl %r8d, %eax + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + addl 392(%rsp), %r13d + movl %r11d, %ecx + movl %r15d, %eax + xorl %r12d, %ecx + rorl $14, %edx + andl %r10d, %ecx + xorl %r10d, %edx + xorl %r12d, %ecx + rorl $5, %edx + addl %ecx, %r13d + xorl %r10d, %edx + xorl %r14d, %eax + rorl $6, %edx + movl %r14d, %ecx + addl %edx, %r13d + rorl $9, %ecx + andl %eax, %ebx + xorl %r14d, %ecx + xorl %r15d, %ebx + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + addl 396(%rsp), %r12d + movl %r10d, %ecx + movl %r14d, %ebx + xorl %r11d, %ecx + rorl $14, %edx + andl %r9d, %ecx + xorl %r9d, %edx + xorl %r11d, %ecx + rorl $5, %edx + addl %ecx, %r12d + xorl %r9d, %edx + xorl %r13d, %ebx + rorl $6, %edx + movl %r13d, %ecx + addl %edx, %r12d + rorl $9, %ecx + andl %ebx, %eax + xorl %r13d, %ecx + xorl %r14d, %eax + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + # rnd_all_4: 26-29 + addl 416(%rsp), %r11d + movl %r9d, %ecx + movl %r13d, %eax + xorl %r10d, %ecx + rorl $14, %edx + andl %r8d, %ecx + xorl %r8d, %edx + xorl %r10d, %ecx + rorl $5, %edx + addl %ecx, %r11d + xorl %r8d, %edx + xorl %r12d, %eax + rorl $6, %edx + movl %r12d, %ecx + addl %edx, %r11d + rorl $9, %ecx + andl %eax, %ebx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + addl 420(%rsp), %r10d + movl %r8d, %ecx + movl %r12d, %ebx + xorl %r9d, %ecx + rorl $14, %edx + andl %r15d, %ecx + xorl %r15d, %edx + xorl %r9d, %ecx + rorl $5, %edx + addl %ecx, %r10d + xorl %r15d, %edx + xorl %r11d, %ebx + rorl $6, %edx + movl %r11d, %ecx + addl %edx, %r10d + rorl $9, %ecx + andl %ebx, %eax + xorl %r11d, %ecx + xorl %r12d, %eax + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + addl 424(%rsp), %r9d + movl %r15d, %ecx + movl %r11d, %eax + xorl %r8d, %ecx + rorl $14, %edx + andl %r14d, %ecx + xorl %r14d, %edx + xorl %r8d, %ecx + rorl $5, %edx + addl %ecx, %r9d + xorl %r14d, %edx + xorl %r10d, %eax + rorl $6, %edx + movl %r10d, %ecx + addl %edx, %r9d + rorl $9, %ecx + andl %eax, %ebx + xorl %r10d, %ecx + xorl %r11d, %ebx + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + addl 428(%rsp), %r8d + movl %r14d, %ecx + movl %r10d, %ebx + xorl %r15d, %ecx + rorl $14, %edx + andl %r13d, %ecx + xorl %r13d, %edx + xorl %r15d, %ecx + rorl $5, %edx + addl %ecx, %r8d + xorl %r13d, %edx + xorl %r9d, %ebx + rorl $6, %edx + movl %r9d, %ecx + addl %edx, %r8d + rorl $9, %ecx + andl %ebx, %eax + xorl %r9d, %ecx + xorl %r10d, %eax + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + # rnd_all_4: 28-31 + addl 448(%rsp), %r15d + movl %r13d, %ecx + movl %r9d, %eax + xorl %r14d, %ecx + rorl $14, %edx + andl %r12d, %ecx + xorl %r12d, %edx + xorl %r14d, %ecx + rorl $5, %edx + addl %ecx, %r15d + xorl %r12d, %edx + xorl %r8d, %eax + rorl $6, %edx + movl %r8d, %ecx + addl %edx, %r15d + rorl $9, %ecx + andl %eax, %ebx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + addl 452(%rsp), %r14d + movl %r12d, %ecx + movl %r8d, %ebx + xorl %r13d, %ecx + rorl $14, %edx + andl %r11d, %ecx + xorl %r11d, %edx + xorl %r13d, %ecx + rorl $5, %edx + addl %ecx, %r14d + xorl %r11d, %edx + xorl %r15d, %ebx + rorl $6, %edx + movl %r15d, %ecx + addl %edx, %r14d + rorl $9, %ecx + andl %ebx, %eax + xorl %r15d, %ecx + xorl %r8d, %eax + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + addl 456(%rsp), %r13d + movl %r11d, %ecx + movl %r15d, %eax + xorl %r12d, %ecx + rorl $14, %edx + andl %r10d, %ecx + xorl %r10d, %edx + xorl %r12d, %ecx + rorl $5, %edx + addl %ecx, %r13d + xorl %r10d, %edx + xorl %r14d, %eax + rorl $6, %edx + movl %r14d, %ecx + addl %edx, %r13d + rorl $9, %ecx + andl %eax, %ebx + xorl %r14d, %ecx + xorl %r15d, %ebx + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + addl 460(%rsp), %r12d + movl %r10d, %ecx + movl %r14d, %ebx + xorl %r11d, %ecx + rorl $14, %edx + andl %r9d, %ecx + xorl %r9d, %edx + xorl %r11d, %ecx + rorl $5, %edx + addl %ecx, %r12d + xorl %r9d, %edx + xorl %r13d, %ebx + rorl $6, %edx + movl %r13d, %ecx + addl %edx, %r12d + rorl $9, %ecx + andl %ebx, %eax + xorl %r13d, %ecx + xorl %r14d, %eax + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + # rnd_all_4: 30-33 + addl 480(%rsp), %r11d + movl %r9d, %ecx + movl %r13d, %eax + xorl %r10d, %ecx + rorl $14, %edx + andl %r8d, %ecx + xorl %r8d, %edx + xorl %r10d, %ecx + rorl $5, %edx + addl %ecx, %r11d + xorl %r8d, %edx + xorl %r12d, %eax + rorl $6, %edx + movl %r12d, %ecx + addl %edx, %r11d + rorl $9, %ecx + andl %eax, %ebx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + addl 484(%rsp), %r10d + movl %r8d, %ecx + movl %r12d, %ebx + xorl %r9d, %ecx + rorl $14, %edx + andl %r15d, %ecx + xorl %r15d, %edx + xorl %r9d, %ecx + rorl $5, %edx + addl %ecx, %r10d + xorl %r15d, %edx + xorl %r11d, %ebx + rorl $6, %edx + movl %r11d, %ecx + addl %edx, %r10d + rorl $9, %ecx + andl %ebx, %eax + xorl %r11d, %ecx + xorl %r12d, %eax + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + addl 488(%rsp), %r9d + movl %r15d, %ecx + movl %r11d, %eax + xorl %r8d, %ecx + rorl $14, %edx + andl %r14d, %ecx + xorl %r14d, %edx + xorl %r8d, %ecx + rorl $5, %edx + addl %ecx, %r9d + xorl %r14d, %edx + xorl %r10d, %eax + rorl $6, %edx + movl %r10d, %ecx + addl %edx, %r9d + rorl $9, %ecx + andl %eax, %ebx + xorl %r10d, %ecx + xorl %r11d, %ebx + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + addl 492(%rsp), %r8d + movl %r14d, %ecx + movl %r10d, %ebx + xorl %r15d, %ecx + rorl $14, %edx + andl %r13d, %ecx + xorl %r13d, %edx + xorl %r15d, %ecx + rorl $5, %edx + addl %ecx, %r8d + xorl %r13d, %edx + xorl %r9d, %ebx + rorl $6, %edx + movl %r9d, %ecx + addl %edx, %r8d + rorl $9, %ecx + andl %ebx, %eax + xorl %r9d, %ecx + xorl %r10d, %eax + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + addl %r8d, (%rdi) + addl %r9d, 4(%rdi) + addl %r10d, 8(%rdi) + addl %r11d, 12(%rdi) + addl %r12d, 16(%rdi) + addl %r13d, 20(%rdi) + addl %r14d, 24(%rdi) + addl %r15d, 28(%rdi) + xorq %rax, %rax + vzeroupper + addq $0x200, %rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + repz retq +#ifndef __APPLE__ +.size Transform_Sha256_AVX2,.-Transform_Sha256_AVX2 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl Transform_Sha256_AVX2_Len +.type Transform_Sha256_AVX2_Len,@function +.align 4 +Transform_Sha256_AVX2_Len: +#else +.section __TEXT,__text +.globl _Transform_Sha256_AVX2_Len +.p2align 2 +_Transform_Sha256_AVX2_Len: +#endif /* __APPLE__ */ + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushq %rbp + movq %rsi, %rbp + movq %rdx, %rsi + subq $0x200, %rsp + testb $0x40, %sil + je L_sha256_len_avx2_block + vmovdqu (%rbp), %ymm0 + vmovdqu 32(%rbp), %ymm1 + vmovups %ymm0, 32(%rdi) + vmovups %ymm1, 64(%rdi) +#ifndef __APPLE__ + call Transform_Sha256_AVX2@plt +#else + call _Transform_Sha256_AVX2 +#endif /* __APPLE__ */ + addq $0x40, %rbp + subl $0x40, %esi + jz L_sha256_len_avx2_done +L_sha256_len_avx2_block: + vmovdqa L_avx2_sha256_flip_mask(%rip), %ymm13 + vmovdqa L_avx2_sha256_shuf_00BA(%rip), %ymm11 + vmovdqa L_avx2_sha256_shuf_DC00(%rip), %ymm12 + movl (%rdi), %r8d + movl 4(%rdi), %r9d + movl 8(%rdi), %r10d + movl 12(%rdi), %r11d + movl 16(%rdi), %r12d + movl 20(%rdi), %r13d + movl 24(%rdi), %r14d + movl 28(%rdi), %r15d + # Start of loop processing two blocks +L_sha256_len_avx2_start: + # X0, X1, X2, X3 = W[0..15] + vmovdqu (%rbp), %xmm0 + vmovdqu 16(%rbp), %xmm1 + vmovdqu 64(%rbp), %xmm4 + vmovdqu 80(%rbp), %xmm5 + vinserti128 $0x01, %xmm4, %ymm0, %ymm0 + vinserti128 $0x01, %xmm5, %ymm1, %ymm1 + vpshufb %ymm13, %ymm0, %ymm0 + vpshufb %ymm13, %ymm1, %ymm1 + vmovdqu 32(%rbp), %xmm2 + vmovdqu 48(%rbp), %xmm3 + vmovdqu 96(%rbp), %xmm6 + vmovdqu 112(%rbp), %xmm7 + vinserti128 $0x01, %xmm6, %ymm2, %ymm2 + vinserti128 $0x01, %xmm7, %ymm3, %ymm3 + vpshufb %ymm13, %ymm2, %ymm2 + vpshufb %ymm13, %ymm3, %ymm3 + movl %r9d, %ebx + movl %r12d, %edx + xorl %r10d, %ebx + # set_w_k_xfer_4: 0 + vpaddd 0+L_avx2_sha256_k(%rip), %ymm0, %ymm4 + vpaddd 32+L_avx2_sha256_k(%rip), %ymm1, %ymm5 + vmovdqu %ymm4, (%rsp) + vmovdqu %ymm5, 32(%rsp) + vpaddd 64+L_avx2_sha256_k(%rip), %ymm2, %ymm4 + vpaddd 96+L_avx2_sha256_k(%rip), %ymm3, %ymm5 + vmovdqu %ymm4, 64(%rsp) + vmovdqu %ymm5, 96(%rsp) + # msg_sched: 0-3 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm0, %ymm1, %ymm5 + vpalignr $4, %ymm2, %ymm3, %ymm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl (%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 4(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm3, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm0, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 8(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 12(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %ymm4, %ymm9, %ymm0 + # msg_sched done: 0-3 + # msg_sched: 8-11 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm1, %ymm2, %ymm5 + vpalignr $4, %ymm3, %ymm0, %ymm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 32(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 36(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm0, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm1, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 40(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 44(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %ymm4, %ymm9, %ymm1 + # msg_sched done: 8-11 + # msg_sched: 16-19 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm2, %ymm3, %ymm5 + vpalignr $4, %ymm0, %ymm1, %ymm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl 64(%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 68(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm1, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm2, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 72(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 76(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %ymm4, %ymm9, %ymm2 + # msg_sched done: 16-19 + # msg_sched: 24-27 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm3, %ymm0, %ymm5 + vpalignr $4, %ymm1, %ymm2, %ymm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 96(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 100(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm2, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm3, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 104(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 108(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %ymm4, %ymm9, %ymm3 + # msg_sched done: 24-27 + # set_w_k_xfer_4: 4 + vpaddd 128+L_avx2_sha256_k(%rip), %ymm0, %ymm4 + vpaddd 160+L_avx2_sha256_k(%rip), %ymm1, %ymm5 + vmovdqu %ymm4, 128(%rsp) + vmovdqu %ymm5, 160(%rsp) + vpaddd 192+L_avx2_sha256_k(%rip), %ymm2, %ymm4 + vpaddd 224+L_avx2_sha256_k(%rip), %ymm3, %ymm5 + vmovdqu %ymm4, 192(%rsp) + vmovdqu %ymm5, 224(%rsp) + # msg_sched: 32-35 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm0, %ymm1, %ymm5 + vpalignr $4, %ymm2, %ymm3, %ymm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl 128(%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 132(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm3, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm0, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 136(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 140(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %ymm4, %ymm9, %ymm0 + # msg_sched done: 32-35 + # msg_sched: 40-43 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm1, %ymm2, %ymm5 + vpalignr $4, %ymm3, %ymm0, %ymm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 160(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 164(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm0, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm1, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 168(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 172(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %ymm4, %ymm9, %ymm1 + # msg_sched done: 40-43 + # msg_sched: 48-51 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm2, %ymm3, %ymm5 + vpalignr $4, %ymm0, %ymm1, %ymm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl 192(%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 196(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm1, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm2, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 200(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 204(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %ymm4, %ymm9, %ymm2 + # msg_sched done: 48-51 + # msg_sched: 56-59 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm3, %ymm0, %ymm5 + vpalignr $4, %ymm1, %ymm2, %ymm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 224(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 228(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm2, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm3, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 232(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 236(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %ymm4, %ymm9, %ymm3 + # msg_sched done: 56-59 + # set_w_k_xfer_4: 8 + vpaddd 256+L_avx2_sha256_k(%rip), %ymm0, %ymm4 + vpaddd 288+L_avx2_sha256_k(%rip), %ymm1, %ymm5 + vmovdqu %ymm4, 256(%rsp) + vmovdqu %ymm5, 288(%rsp) + vpaddd 320+L_avx2_sha256_k(%rip), %ymm2, %ymm4 + vpaddd 352+L_avx2_sha256_k(%rip), %ymm3, %ymm5 + vmovdqu %ymm4, 320(%rsp) + vmovdqu %ymm5, 352(%rsp) + # msg_sched: 64-67 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm0, %ymm1, %ymm5 + vpalignr $4, %ymm2, %ymm3, %ymm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl 256(%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 260(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm3, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm0, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 264(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 268(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %ymm4, %ymm9, %ymm0 + # msg_sched done: 64-67 + # msg_sched: 72-75 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm1, %ymm2, %ymm5 + vpalignr $4, %ymm3, %ymm0, %ymm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 288(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 292(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm0, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm1, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 296(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 300(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %ymm4, %ymm9, %ymm1 + # msg_sched done: 72-75 + # msg_sched: 80-83 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm2, %ymm3, %ymm5 + vpalignr $4, %ymm0, %ymm1, %ymm4 + # rnd_0: 1 - 2 + movl %r9d, %eax + movl %r13d, %ecx + addl 320(%rsp), %r15d + xorl %r14d, %ecx + xorl %r12d, %edx + andl %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r14d, %ecx + xorl %r12d, %edx + addl %ecx, %r15d + rorl $6, %edx + xorl %r8d, %eax + addl %edx, %r15d + movl %r8d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r8d, %ebx + movl %r12d, %ecx + addl 324(%rsp), %r14d + xorl %r13d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r11d, %edx + andl %r11d, %ecx + rorl $5, %edx + xorl %r13d, %ecx + xorl %r11d, %edx + addl %ecx, %r14d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm1, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r15d, %ebx + addl %edx, %r14d + movl %r15d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r15d, %ecx + xorl %r8d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm2, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r15d, %eax + movl %r11d, %ecx + addl 328(%rsp), %r13d + xorl %r12d, %ecx + xorl %r10d, %edx + andl %r10d, %ecx + rorl $5, %edx + xorl %r12d, %ecx + xorl %r10d, %edx + addl %ecx, %r13d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r14d, %eax + addl %edx, %r13d + movl %r14d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r14d, %ecx + xorl %r15d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r14d, %ebx + movl %r10d, %ecx + addl 332(%rsp), %r12d + xorl %r11d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r9d, %edx + andl %r9d, %ecx + rorl $5, %edx + xorl %r11d, %ecx + xorl %r9d, %edx + addl %ecx, %r12d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r13d, %ebx + addl %edx, %r12d + movl %r13d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r13d, %ecx + xorl %r14d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + vpaddd %ymm4, %ymm9, %ymm2 + # msg_sched done: 80-83 + # msg_sched: 88-91 + # rnd_0: 0 - 0 + rorl $14, %edx + vpalignr $4, %ymm3, %ymm0, %ymm5 + vpalignr $4, %ymm1, %ymm2, %ymm4 + # rnd_0: 1 - 2 + movl %r13d, %eax + movl %r9d, %ecx + addl 352(%rsp), %r11d + xorl %r10d, %ecx + xorl %r8d, %edx + andl %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + vpslld $25, %ymm5, %ymm7 + # rnd_0: 3 - 4 + rorl $5, %edx + xorl %r10d, %ecx + xorl %r8d, %edx + addl %ecx, %r11d + rorl $6, %edx + xorl %r12d, %eax + addl %edx, %r11d + movl %r12d, %ecx + vpsrld $18, %ymm5, %ymm8 + vpslld $14, %ymm5, %ymm9 + # rnd_0: 5 - 6 + andl %eax, %ebx + rorl $9, %ecx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + vpor %ymm6, %ymm7, %ymm6 + vpor %ymm8, %ymm9, %ymm8 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + # rnd_1: 0 - 1 + rorl $14, %edx + movl %r12d, %ebx + movl %r8d, %ecx + addl 356(%rsp), %r10d + xorl %r9d, %ecx + vpsrld $3, %ymm5, %ymm9 + vpxor %ymm6, %ymm8, %ymm6 + # rnd_1: 2 - 3 + xorl %r15d, %edx + andl %r15d, %ecx + rorl $5, %edx + xorl %r9d, %ecx + xorl %r15d, %edx + addl %ecx, %r10d + vpxor %ymm6, %ymm9, %ymm5 + vpshufd $0xfa, %ymm2, %ymm6 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r11d, %ebx + addl %edx, %r10d + movl %r11d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r11d, %ecx + xorl %r12d, %eax + vpsrld $10, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 6 - 7 + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + # rnd_0: 0 - 0 + rorl $14, %edx + vpsrlq $0x11, %ymm6, %ymm6 + vpaddd %ymm3, %ymm4, %ymm4 + # rnd_0: 1 - 3 + movl %r11d, %eax + movl %r15d, %ecx + addl 360(%rsp), %r9d + xorl %r8d, %ecx + xorl %r14d, %edx + andl %r14d, %ecx + rorl $5, %edx + xorl %r8d, %ecx + xorl %r14d, %edx + addl %ecx, %r9d + vpxor %ymm6, %ymm7, %ymm6 + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 4 - 4 + rorl $6, %edx + xorl %r10d, %eax + addl %edx, %r9d + movl %r10d, %ecx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 5 - 5 + andl %eax, %ebx + rorl $9, %ecx + xorl %r10d, %ecx + xorl %r11d, %ebx + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 6 - 6 + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 7 - 7 + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + # rnd_1: 0 - 0 + rorl $14, %edx + vpshufd $0x50, %ymm4, %ymm6 + # rnd_1: 1 - 1 + movl %r10d, %ebx + movl %r14d, %ecx + addl 364(%rsp), %r8d + xorl %r15d, %ecx + vpsrlq $0x11, %ymm6, %ymm8 + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 2 - 3 + xorl %r13d, %edx + andl %r13d, %ecx + rorl $5, %edx + xorl %r15d, %ecx + xorl %r13d, %edx + addl %ecx, %r8d + vpsrld $10, %ymm6, %ymm9 + vpxor %ymm8, %ymm7, %ymm8 + # rnd_1: 4 - 5 + rorl $6, %edx + xorl %r9d, %ebx + addl %edx, %r8d + movl %r9d, %ecx + andl %ebx, %eax + rorl $9, %ecx + xorl %r9d, %ecx + xorl %r10d, %eax + vpxor %ymm9, %ymm8, %ymm9 + # rnd_1: 6 - 6 + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 7 - 7 + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + vpaddd %ymm4, %ymm9, %ymm3 + # msg_sched done: 88-91 + # set_w_k_xfer_4: 12 + vpaddd 384+L_avx2_sha256_k(%rip), %ymm0, %ymm4 + vpaddd 416+L_avx2_sha256_k(%rip), %ymm1, %ymm5 + vmovdqu %ymm4, 384(%rsp) + vmovdqu %ymm5, 416(%rsp) + vpaddd 448+L_avx2_sha256_k(%rip), %ymm2, %ymm4 + vpaddd 480+L_avx2_sha256_k(%rip), %ymm3, %ymm5 + vmovdqu %ymm4, 448(%rsp) + vmovdqu %ymm5, 480(%rsp) + # rnd_all_4: 24-27 + addl 384(%rsp), %r15d + movl %r13d, %ecx + movl %r9d, %eax + xorl %r14d, %ecx + rorl $14, %edx + andl %r12d, %ecx + xorl %r12d, %edx + xorl %r14d, %ecx + rorl $5, %edx + addl %ecx, %r15d + xorl %r12d, %edx + xorl %r8d, %eax + rorl $6, %edx + movl %r8d, %ecx + addl %edx, %r15d + rorl $9, %ecx + andl %eax, %ebx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + addl 388(%rsp), %r14d + movl %r12d, %ecx + movl %r8d, %ebx + xorl %r13d, %ecx + rorl $14, %edx + andl %r11d, %ecx + xorl %r11d, %edx + xorl %r13d, %ecx + rorl $5, %edx + addl %ecx, %r14d + xorl %r11d, %edx + xorl %r15d, %ebx + rorl $6, %edx + movl %r15d, %ecx + addl %edx, %r14d + rorl $9, %ecx + andl %ebx, %eax + xorl %r15d, %ecx + xorl %r8d, %eax + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + addl 392(%rsp), %r13d + movl %r11d, %ecx + movl %r15d, %eax + xorl %r12d, %ecx + rorl $14, %edx + andl %r10d, %ecx + xorl %r10d, %edx + xorl %r12d, %ecx + rorl $5, %edx + addl %ecx, %r13d + xorl %r10d, %edx + xorl %r14d, %eax + rorl $6, %edx + movl %r14d, %ecx + addl %edx, %r13d + rorl $9, %ecx + andl %eax, %ebx + xorl %r14d, %ecx + xorl %r15d, %ebx + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + addl 396(%rsp), %r12d + movl %r10d, %ecx + movl %r14d, %ebx + xorl %r11d, %ecx + rorl $14, %edx + andl %r9d, %ecx + xorl %r9d, %edx + xorl %r11d, %ecx + rorl $5, %edx + addl %ecx, %r12d + xorl %r9d, %edx + xorl %r13d, %ebx + rorl $6, %edx + movl %r13d, %ecx + addl %edx, %r12d + rorl $9, %ecx + andl %ebx, %eax + xorl %r13d, %ecx + xorl %r14d, %eax + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + # rnd_all_4: 26-29 + addl 416(%rsp), %r11d + movl %r9d, %ecx + movl %r13d, %eax + xorl %r10d, %ecx + rorl $14, %edx + andl %r8d, %ecx + xorl %r8d, %edx + xorl %r10d, %ecx + rorl $5, %edx + addl %ecx, %r11d + xorl %r8d, %edx + xorl %r12d, %eax + rorl $6, %edx + movl %r12d, %ecx + addl %edx, %r11d + rorl $9, %ecx + andl %eax, %ebx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + addl 420(%rsp), %r10d + movl %r8d, %ecx + movl %r12d, %ebx + xorl %r9d, %ecx + rorl $14, %edx + andl %r15d, %ecx + xorl %r15d, %edx + xorl %r9d, %ecx + rorl $5, %edx + addl %ecx, %r10d + xorl %r15d, %edx + xorl %r11d, %ebx + rorl $6, %edx + movl %r11d, %ecx + addl %edx, %r10d + rorl $9, %ecx + andl %ebx, %eax + xorl %r11d, %ecx + xorl %r12d, %eax + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + addl 424(%rsp), %r9d + movl %r15d, %ecx + movl %r11d, %eax + xorl %r8d, %ecx + rorl $14, %edx + andl %r14d, %ecx + xorl %r14d, %edx + xorl %r8d, %ecx + rorl $5, %edx + addl %ecx, %r9d + xorl %r14d, %edx + xorl %r10d, %eax + rorl $6, %edx + movl %r10d, %ecx + addl %edx, %r9d + rorl $9, %ecx + andl %eax, %ebx + xorl %r10d, %ecx + xorl %r11d, %ebx + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + addl 428(%rsp), %r8d + movl %r14d, %ecx + movl %r10d, %ebx + xorl %r15d, %ecx + rorl $14, %edx + andl %r13d, %ecx + xorl %r13d, %edx + xorl %r15d, %ecx + rorl $5, %edx + addl %ecx, %r8d + xorl %r13d, %edx + xorl %r9d, %ebx + rorl $6, %edx + movl %r9d, %ecx + addl %edx, %r8d + rorl $9, %ecx + andl %ebx, %eax + xorl %r9d, %ecx + xorl %r10d, %eax + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + # rnd_all_4: 28-31 + addl 448(%rsp), %r15d + movl %r13d, %ecx + movl %r9d, %eax + xorl %r14d, %ecx + rorl $14, %edx + andl %r12d, %ecx + xorl %r12d, %edx + xorl %r14d, %ecx + rorl $5, %edx + addl %ecx, %r15d + xorl %r12d, %edx + xorl %r8d, %eax + rorl $6, %edx + movl %r8d, %ecx + addl %edx, %r15d + rorl $9, %ecx + andl %eax, %ebx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + addl 452(%rsp), %r14d + movl %r12d, %ecx + movl %r8d, %ebx + xorl %r13d, %ecx + rorl $14, %edx + andl %r11d, %ecx + xorl %r11d, %edx + xorl %r13d, %ecx + rorl $5, %edx + addl %ecx, %r14d + xorl %r11d, %edx + xorl %r15d, %ebx + rorl $6, %edx + movl %r15d, %ecx + addl %edx, %r14d + rorl $9, %ecx + andl %ebx, %eax + xorl %r15d, %ecx + xorl %r8d, %eax + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + addl 456(%rsp), %r13d + movl %r11d, %ecx + movl %r15d, %eax + xorl %r12d, %ecx + rorl $14, %edx + andl %r10d, %ecx + xorl %r10d, %edx + xorl %r12d, %ecx + rorl $5, %edx + addl %ecx, %r13d + xorl %r10d, %edx + xorl %r14d, %eax + rorl $6, %edx + movl %r14d, %ecx + addl %edx, %r13d + rorl $9, %ecx + andl %eax, %ebx + xorl %r14d, %ecx + xorl %r15d, %ebx + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + addl 460(%rsp), %r12d + movl %r10d, %ecx + movl %r14d, %ebx + xorl %r11d, %ecx + rorl $14, %edx + andl %r9d, %ecx + xorl %r9d, %edx + xorl %r11d, %ecx + rorl $5, %edx + addl %ecx, %r12d + xorl %r9d, %edx + xorl %r13d, %ebx + rorl $6, %edx + movl %r13d, %ecx + addl %edx, %r12d + rorl $9, %ecx + andl %ebx, %eax + xorl %r13d, %ecx + xorl %r14d, %eax + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + # rnd_all_4: 30-33 + addl 480(%rsp), %r11d + movl %r9d, %ecx + movl %r13d, %eax + xorl %r10d, %ecx + rorl $14, %edx + andl %r8d, %ecx + xorl %r8d, %edx + xorl %r10d, %ecx + rorl $5, %edx + addl %ecx, %r11d + xorl %r8d, %edx + xorl %r12d, %eax + rorl $6, %edx + movl %r12d, %ecx + addl %edx, %r11d + rorl $9, %ecx + andl %eax, %ebx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + addl 484(%rsp), %r10d + movl %r8d, %ecx + movl %r12d, %ebx + xorl %r9d, %ecx + rorl $14, %edx + andl %r15d, %ecx + xorl %r15d, %edx + xorl %r9d, %ecx + rorl $5, %edx + addl %ecx, %r10d + xorl %r15d, %edx + xorl %r11d, %ebx + rorl $6, %edx + movl %r11d, %ecx + addl %edx, %r10d + rorl $9, %ecx + andl %ebx, %eax + xorl %r11d, %ecx + xorl %r12d, %eax + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + addl 488(%rsp), %r9d + movl %r15d, %ecx + movl %r11d, %eax + xorl %r8d, %ecx + rorl $14, %edx + andl %r14d, %ecx + xorl %r14d, %edx + xorl %r8d, %ecx + rorl $5, %edx + addl %ecx, %r9d + xorl %r14d, %edx + xorl %r10d, %eax + rorl $6, %edx + movl %r10d, %ecx + addl %edx, %r9d + rorl $9, %ecx + andl %eax, %ebx + xorl %r10d, %ecx + xorl %r11d, %ebx + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + addl 492(%rsp), %r8d + movl %r14d, %ecx + movl %r10d, %ebx + xorl %r15d, %ecx + rorl $14, %edx + andl %r13d, %ecx + xorl %r13d, %edx + xorl %r15d, %ecx + rorl $5, %edx + addl %ecx, %r8d + xorl %r13d, %edx + xorl %r9d, %ebx + rorl $6, %edx + movl %r9d, %ecx + addl %edx, %r8d + rorl $9, %ecx + andl %ebx, %eax + xorl %r9d, %ecx + xorl %r10d, %eax + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + addl (%rdi), %r8d + addl 4(%rdi), %r9d + addl 8(%rdi), %r10d + addl 12(%rdi), %r11d + addl 16(%rdi), %r12d + addl 20(%rdi), %r13d + addl 24(%rdi), %r14d + addl 28(%rdi), %r15d + movl %r8d, (%rdi) + movl %r9d, 4(%rdi) + movl %r10d, 8(%rdi) + movl %r11d, 12(%rdi) + movl %r12d, 16(%rdi) + movl %r13d, 20(%rdi) + movl %r14d, 24(%rdi) + movl %r15d, 28(%rdi) + movl %r9d, %ebx + movl %r12d, %edx + xorl %r10d, %ebx + # rnd_all_4: 1-4 + addl 16(%rsp), %r15d + movl %r13d, %ecx + movl %r9d, %eax + xorl %r14d, %ecx + rorl $14, %edx + andl %r12d, %ecx + xorl %r12d, %edx + xorl %r14d, %ecx + rorl $5, %edx + addl %ecx, %r15d + xorl %r12d, %edx + xorl %r8d, %eax + rorl $6, %edx + movl %r8d, %ecx + addl %edx, %r15d + rorl $9, %ecx + andl %eax, %ebx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + addl 20(%rsp), %r14d + movl %r12d, %ecx + movl %r8d, %ebx + xorl %r13d, %ecx + rorl $14, %edx + andl %r11d, %ecx + xorl %r11d, %edx + xorl %r13d, %ecx + rorl $5, %edx + addl %ecx, %r14d + xorl %r11d, %edx + xorl %r15d, %ebx + rorl $6, %edx + movl %r15d, %ecx + addl %edx, %r14d + rorl $9, %ecx + andl %ebx, %eax + xorl %r15d, %ecx + xorl %r8d, %eax + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + addl 24(%rsp), %r13d + movl %r11d, %ecx + movl %r15d, %eax + xorl %r12d, %ecx + rorl $14, %edx + andl %r10d, %ecx + xorl %r10d, %edx + xorl %r12d, %ecx + rorl $5, %edx + addl %ecx, %r13d + xorl %r10d, %edx + xorl %r14d, %eax + rorl $6, %edx + movl %r14d, %ecx + addl %edx, %r13d + rorl $9, %ecx + andl %eax, %ebx + xorl %r14d, %ecx + xorl %r15d, %ebx + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + addl 28(%rsp), %r12d + movl %r10d, %ecx + movl %r14d, %ebx + xorl %r11d, %ecx + rorl $14, %edx + andl %r9d, %ecx + xorl %r9d, %edx + xorl %r11d, %ecx + rorl $5, %edx + addl %ecx, %r12d + xorl %r9d, %edx + xorl %r13d, %ebx + rorl $6, %edx + movl %r13d, %ecx + addl %edx, %r12d + rorl $9, %ecx + andl %ebx, %eax + xorl %r13d, %ecx + xorl %r14d, %eax + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + # rnd_all_4: 3-6 + addl 48(%rsp), %r11d + movl %r9d, %ecx + movl %r13d, %eax + xorl %r10d, %ecx + rorl $14, %edx + andl %r8d, %ecx + xorl %r8d, %edx + xorl %r10d, %ecx + rorl $5, %edx + addl %ecx, %r11d + xorl %r8d, %edx + xorl %r12d, %eax + rorl $6, %edx + movl %r12d, %ecx + addl %edx, %r11d + rorl $9, %ecx + andl %eax, %ebx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + addl 52(%rsp), %r10d + movl %r8d, %ecx + movl %r12d, %ebx + xorl %r9d, %ecx + rorl $14, %edx + andl %r15d, %ecx + xorl %r15d, %edx + xorl %r9d, %ecx + rorl $5, %edx + addl %ecx, %r10d + xorl %r15d, %edx + xorl %r11d, %ebx + rorl $6, %edx + movl %r11d, %ecx + addl %edx, %r10d + rorl $9, %ecx + andl %ebx, %eax + xorl %r11d, %ecx + xorl %r12d, %eax + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + addl 56(%rsp), %r9d + movl %r15d, %ecx + movl %r11d, %eax + xorl %r8d, %ecx + rorl $14, %edx + andl %r14d, %ecx + xorl %r14d, %edx + xorl %r8d, %ecx + rorl $5, %edx + addl %ecx, %r9d + xorl %r14d, %edx + xorl %r10d, %eax + rorl $6, %edx + movl %r10d, %ecx + addl %edx, %r9d + rorl $9, %ecx + andl %eax, %ebx + xorl %r10d, %ecx + xorl %r11d, %ebx + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + addl 60(%rsp), %r8d + movl %r14d, %ecx + movl %r10d, %ebx + xorl %r15d, %ecx + rorl $14, %edx + andl %r13d, %ecx + xorl %r13d, %edx + xorl %r15d, %ecx + rorl $5, %edx + addl %ecx, %r8d + xorl %r13d, %edx + xorl %r9d, %ebx + rorl $6, %edx + movl %r9d, %ecx + addl %edx, %r8d + rorl $9, %ecx + andl %ebx, %eax + xorl %r9d, %ecx + xorl %r10d, %eax + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + # rnd_all_4: 5-8 + addl 80(%rsp), %r15d + movl %r13d, %ecx + movl %r9d, %eax + xorl %r14d, %ecx + rorl $14, %edx + andl %r12d, %ecx + xorl %r12d, %edx + xorl %r14d, %ecx + rorl $5, %edx + addl %ecx, %r15d + xorl %r12d, %edx + xorl %r8d, %eax + rorl $6, %edx + movl %r8d, %ecx + addl %edx, %r15d + rorl $9, %ecx + andl %eax, %ebx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + addl 84(%rsp), %r14d + movl %r12d, %ecx + movl %r8d, %ebx + xorl %r13d, %ecx + rorl $14, %edx + andl %r11d, %ecx + xorl %r11d, %edx + xorl %r13d, %ecx + rorl $5, %edx + addl %ecx, %r14d + xorl %r11d, %edx + xorl %r15d, %ebx + rorl $6, %edx + movl %r15d, %ecx + addl %edx, %r14d + rorl $9, %ecx + andl %ebx, %eax + xorl %r15d, %ecx + xorl %r8d, %eax + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + addl 88(%rsp), %r13d + movl %r11d, %ecx + movl %r15d, %eax + xorl %r12d, %ecx + rorl $14, %edx + andl %r10d, %ecx + xorl %r10d, %edx + xorl %r12d, %ecx + rorl $5, %edx + addl %ecx, %r13d + xorl %r10d, %edx + xorl %r14d, %eax + rorl $6, %edx + movl %r14d, %ecx + addl %edx, %r13d + rorl $9, %ecx + andl %eax, %ebx + xorl %r14d, %ecx + xorl %r15d, %ebx + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + addl 92(%rsp), %r12d + movl %r10d, %ecx + movl %r14d, %ebx + xorl %r11d, %ecx + rorl $14, %edx + andl %r9d, %ecx + xorl %r9d, %edx + xorl %r11d, %ecx + rorl $5, %edx + addl %ecx, %r12d + xorl %r9d, %edx + xorl %r13d, %ebx + rorl $6, %edx + movl %r13d, %ecx + addl %edx, %r12d + rorl $9, %ecx + andl %ebx, %eax + xorl %r13d, %ecx + xorl %r14d, %eax + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + # rnd_all_4: 7-10 + addl 112(%rsp), %r11d + movl %r9d, %ecx + movl %r13d, %eax + xorl %r10d, %ecx + rorl $14, %edx + andl %r8d, %ecx + xorl %r8d, %edx + xorl %r10d, %ecx + rorl $5, %edx + addl %ecx, %r11d + xorl %r8d, %edx + xorl %r12d, %eax + rorl $6, %edx + movl %r12d, %ecx + addl %edx, %r11d + rorl $9, %ecx + andl %eax, %ebx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + addl 116(%rsp), %r10d + movl %r8d, %ecx + movl %r12d, %ebx + xorl %r9d, %ecx + rorl $14, %edx + andl %r15d, %ecx + xorl %r15d, %edx + xorl %r9d, %ecx + rorl $5, %edx + addl %ecx, %r10d + xorl %r15d, %edx + xorl %r11d, %ebx + rorl $6, %edx + movl %r11d, %ecx + addl %edx, %r10d + rorl $9, %ecx + andl %ebx, %eax + xorl %r11d, %ecx + xorl %r12d, %eax + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + addl 120(%rsp), %r9d + movl %r15d, %ecx + movl %r11d, %eax + xorl %r8d, %ecx + rorl $14, %edx + andl %r14d, %ecx + xorl %r14d, %edx + xorl %r8d, %ecx + rorl $5, %edx + addl %ecx, %r9d + xorl %r14d, %edx + xorl %r10d, %eax + rorl $6, %edx + movl %r10d, %ecx + addl %edx, %r9d + rorl $9, %ecx + andl %eax, %ebx + xorl %r10d, %ecx + xorl %r11d, %ebx + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + addl 124(%rsp), %r8d + movl %r14d, %ecx + movl %r10d, %ebx + xorl %r15d, %ecx + rorl $14, %edx + andl %r13d, %ecx + xorl %r13d, %edx + xorl %r15d, %ecx + rorl $5, %edx + addl %ecx, %r8d + xorl %r13d, %edx + xorl %r9d, %ebx + rorl $6, %edx + movl %r9d, %ecx + addl %edx, %r8d + rorl $9, %ecx + andl %ebx, %eax + xorl %r9d, %ecx + xorl %r10d, %eax + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + # rnd_all_4: 9-12 + addl 144(%rsp), %r15d + movl %r13d, %ecx + movl %r9d, %eax + xorl %r14d, %ecx + rorl $14, %edx + andl %r12d, %ecx + xorl %r12d, %edx + xorl %r14d, %ecx + rorl $5, %edx + addl %ecx, %r15d + xorl %r12d, %edx + xorl %r8d, %eax + rorl $6, %edx + movl %r8d, %ecx + addl %edx, %r15d + rorl $9, %ecx + andl %eax, %ebx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + addl 148(%rsp), %r14d + movl %r12d, %ecx + movl %r8d, %ebx + xorl %r13d, %ecx + rorl $14, %edx + andl %r11d, %ecx + xorl %r11d, %edx + xorl %r13d, %ecx + rorl $5, %edx + addl %ecx, %r14d + xorl %r11d, %edx + xorl %r15d, %ebx + rorl $6, %edx + movl %r15d, %ecx + addl %edx, %r14d + rorl $9, %ecx + andl %ebx, %eax + xorl %r15d, %ecx + xorl %r8d, %eax + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + addl 152(%rsp), %r13d + movl %r11d, %ecx + movl %r15d, %eax + xorl %r12d, %ecx + rorl $14, %edx + andl %r10d, %ecx + xorl %r10d, %edx + xorl %r12d, %ecx + rorl $5, %edx + addl %ecx, %r13d + xorl %r10d, %edx + xorl %r14d, %eax + rorl $6, %edx + movl %r14d, %ecx + addl %edx, %r13d + rorl $9, %ecx + andl %eax, %ebx + xorl %r14d, %ecx + xorl %r15d, %ebx + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + addl 156(%rsp), %r12d + movl %r10d, %ecx + movl %r14d, %ebx + xorl %r11d, %ecx + rorl $14, %edx + andl %r9d, %ecx + xorl %r9d, %edx + xorl %r11d, %ecx + rorl $5, %edx + addl %ecx, %r12d + xorl %r9d, %edx + xorl %r13d, %ebx + rorl $6, %edx + movl %r13d, %ecx + addl %edx, %r12d + rorl $9, %ecx + andl %ebx, %eax + xorl %r13d, %ecx + xorl %r14d, %eax + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + # rnd_all_4: 11-14 + addl 176(%rsp), %r11d + movl %r9d, %ecx + movl %r13d, %eax + xorl %r10d, %ecx + rorl $14, %edx + andl %r8d, %ecx + xorl %r8d, %edx + xorl %r10d, %ecx + rorl $5, %edx + addl %ecx, %r11d + xorl %r8d, %edx + xorl %r12d, %eax + rorl $6, %edx + movl %r12d, %ecx + addl %edx, %r11d + rorl $9, %ecx + andl %eax, %ebx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + addl 180(%rsp), %r10d + movl %r8d, %ecx + movl %r12d, %ebx + xorl %r9d, %ecx + rorl $14, %edx + andl %r15d, %ecx + xorl %r15d, %edx + xorl %r9d, %ecx + rorl $5, %edx + addl %ecx, %r10d + xorl %r15d, %edx + xorl %r11d, %ebx + rorl $6, %edx + movl %r11d, %ecx + addl %edx, %r10d + rorl $9, %ecx + andl %ebx, %eax + xorl %r11d, %ecx + xorl %r12d, %eax + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + addl 184(%rsp), %r9d + movl %r15d, %ecx + movl %r11d, %eax + xorl %r8d, %ecx + rorl $14, %edx + andl %r14d, %ecx + xorl %r14d, %edx + xorl %r8d, %ecx + rorl $5, %edx + addl %ecx, %r9d + xorl %r14d, %edx + xorl %r10d, %eax + rorl $6, %edx + movl %r10d, %ecx + addl %edx, %r9d + rorl $9, %ecx + andl %eax, %ebx + xorl %r10d, %ecx + xorl %r11d, %ebx + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + addl 188(%rsp), %r8d + movl %r14d, %ecx + movl %r10d, %ebx + xorl %r15d, %ecx + rorl $14, %edx + andl %r13d, %ecx + xorl %r13d, %edx + xorl %r15d, %ecx + rorl $5, %edx + addl %ecx, %r8d + xorl %r13d, %edx + xorl %r9d, %ebx + rorl $6, %edx + movl %r9d, %ecx + addl %edx, %r8d + rorl $9, %ecx + andl %ebx, %eax + xorl %r9d, %ecx + xorl %r10d, %eax + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + # rnd_all_4: 13-16 + addl 208(%rsp), %r15d + movl %r13d, %ecx + movl %r9d, %eax + xorl %r14d, %ecx + rorl $14, %edx + andl %r12d, %ecx + xorl %r12d, %edx + xorl %r14d, %ecx + rorl $5, %edx + addl %ecx, %r15d + xorl %r12d, %edx + xorl %r8d, %eax + rorl $6, %edx + movl %r8d, %ecx + addl %edx, %r15d + rorl $9, %ecx + andl %eax, %ebx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + addl 212(%rsp), %r14d + movl %r12d, %ecx + movl %r8d, %ebx + xorl %r13d, %ecx + rorl $14, %edx + andl %r11d, %ecx + xorl %r11d, %edx + xorl %r13d, %ecx + rorl $5, %edx + addl %ecx, %r14d + xorl %r11d, %edx + xorl %r15d, %ebx + rorl $6, %edx + movl %r15d, %ecx + addl %edx, %r14d + rorl $9, %ecx + andl %ebx, %eax + xorl %r15d, %ecx + xorl %r8d, %eax + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + addl 216(%rsp), %r13d + movl %r11d, %ecx + movl %r15d, %eax + xorl %r12d, %ecx + rorl $14, %edx + andl %r10d, %ecx + xorl %r10d, %edx + xorl %r12d, %ecx + rorl $5, %edx + addl %ecx, %r13d + xorl %r10d, %edx + xorl %r14d, %eax + rorl $6, %edx + movl %r14d, %ecx + addl %edx, %r13d + rorl $9, %ecx + andl %eax, %ebx + xorl %r14d, %ecx + xorl %r15d, %ebx + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + addl 220(%rsp), %r12d + movl %r10d, %ecx + movl %r14d, %ebx + xorl %r11d, %ecx + rorl $14, %edx + andl %r9d, %ecx + xorl %r9d, %edx + xorl %r11d, %ecx + rorl $5, %edx + addl %ecx, %r12d + xorl %r9d, %edx + xorl %r13d, %ebx + rorl $6, %edx + movl %r13d, %ecx + addl %edx, %r12d + rorl $9, %ecx + andl %ebx, %eax + xorl %r13d, %ecx + xorl %r14d, %eax + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + # rnd_all_4: 15-18 + addl 240(%rsp), %r11d + movl %r9d, %ecx + movl %r13d, %eax + xorl %r10d, %ecx + rorl $14, %edx + andl %r8d, %ecx + xorl %r8d, %edx + xorl %r10d, %ecx + rorl $5, %edx + addl %ecx, %r11d + xorl %r8d, %edx + xorl %r12d, %eax + rorl $6, %edx + movl %r12d, %ecx + addl %edx, %r11d + rorl $9, %ecx + andl %eax, %ebx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + addl 244(%rsp), %r10d + movl %r8d, %ecx + movl %r12d, %ebx + xorl %r9d, %ecx + rorl $14, %edx + andl %r15d, %ecx + xorl %r15d, %edx + xorl %r9d, %ecx + rorl $5, %edx + addl %ecx, %r10d + xorl %r15d, %edx + xorl %r11d, %ebx + rorl $6, %edx + movl %r11d, %ecx + addl %edx, %r10d + rorl $9, %ecx + andl %ebx, %eax + xorl %r11d, %ecx + xorl %r12d, %eax + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + addl 248(%rsp), %r9d + movl %r15d, %ecx + movl %r11d, %eax + xorl %r8d, %ecx + rorl $14, %edx + andl %r14d, %ecx + xorl %r14d, %edx + xorl %r8d, %ecx + rorl $5, %edx + addl %ecx, %r9d + xorl %r14d, %edx + xorl %r10d, %eax + rorl $6, %edx + movl %r10d, %ecx + addl %edx, %r9d + rorl $9, %ecx + andl %eax, %ebx + xorl %r10d, %ecx + xorl %r11d, %ebx + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + addl 252(%rsp), %r8d + movl %r14d, %ecx + movl %r10d, %ebx + xorl %r15d, %ecx + rorl $14, %edx + andl %r13d, %ecx + xorl %r13d, %edx + xorl %r15d, %ecx + rorl $5, %edx + addl %ecx, %r8d + xorl %r13d, %edx + xorl %r9d, %ebx + rorl $6, %edx + movl %r9d, %ecx + addl %edx, %r8d + rorl $9, %ecx + andl %ebx, %eax + xorl %r9d, %ecx + xorl %r10d, %eax + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + # rnd_all_4: 17-20 + addl 272(%rsp), %r15d + movl %r13d, %ecx + movl %r9d, %eax + xorl %r14d, %ecx + rorl $14, %edx + andl %r12d, %ecx + xorl %r12d, %edx + xorl %r14d, %ecx + rorl $5, %edx + addl %ecx, %r15d + xorl %r12d, %edx + xorl %r8d, %eax + rorl $6, %edx + movl %r8d, %ecx + addl %edx, %r15d + rorl $9, %ecx + andl %eax, %ebx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + addl 276(%rsp), %r14d + movl %r12d, %ecx + movl %r8d, %ebx + xorl %r13d, %ecx + rorl $14, %edx + andl %r11d, %ecx + xorl %r11d, %edx + xorl %r13d, %ecx + rorl $5, %edx + addl %ecx, %r14d + xorl %r11d, %edx + xorl %r15d, %ebx + rorl $6, %edx + movl %r15d, %ecx + addl %edx, %r14d + rorl $9, %ecx + andl %ebx, %eax + xorl %r15d, %ecx + xorl %r8d, %eax + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + addl 280(%rsp), %r13d + movl %r11d, %ecx + movl %r15d, %eax + xorl %r12d, %ecx + rorl $14, %edx + andl %r10d, %ecx + xorl %r10d, %edx + xorl %r12d, %ecx + rorl $5, %edx + addl %ecx, %r13d + xorl %r10d, %edx + xorl %r14d, %eax + rorl $6, %edx + movl %r14d, %ecx + addl %edx, %r13d + rorl $9, %ecx + andl %eax, %ebx + xorl %r14d, %ecx + xorl %r15d, %ebx + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + addl 284(%rsp), %r12d + movl %r10d, %ecx + movl %r14d, %ebx + xorl %r11d, %ecx + rorl $14, %edx + andl %r9d, %ecx + xorl %r9d, %edx + xorl %r11d, %ecx + rorl $5, %edx + addl %ecx, %r12d + xorl %r9d, %edx + xorl %r13d, %ebx + rorl $6, %edx + movl %r13d, %ecx + addl %edx, %r12d + rorl $9, %ecx + andl %ebx, %eax + xorl %r13d, %ecx + xorl %r14d, %eax + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + # rnd_all_4: 19-22 + addl 304(%rsp), %r11d + movl %r9d, %ecx + movl %r13d, %eax + xorl %r10d, %ecx + rorl $14, %edx + andl %r8d, %ecx + xorl %r8d, %edx + xorl %r10d, %ecx + rorl $5, %edx + addl %ecx, %r11d + xorl %r8d, %edx + xorl %r12d, %eax + rorl $6, %edx + movl %r12d, %ecx + addl %edx, %r11d + rorl $9, %ecx + andl %eax, %ebx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + addl 308(%rsp), %r10d + movl %r8d, %ecx + movl %r12d, %ebx + xorl %r9d, %ecx + rorl $14, %edx + andl %r15d, %ecx + xorl %r15d, %edx + xorl %r9d, %ecx + rorl $5, %edx + addl %ecx, %r10d + xorl %r15d, %edx + xorl %r11d, %ebx + rorl $6, %edx + movl %r11d, %ecx + addl %edx, %r10d + rorl $9, %ecx + andl %ebx, %eax + xorl %r11d, %ecx + xorl %r12d, %eax + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + addl 312(%rsp), %r9d + movl %r15d, %ecx + movl %r11d, %eax + xorl %r8d, %ecx + rorl $14, %edx + andl %r14d, %ecx + xorl %r14d, %edx + xorl %r8d, %ecx + rorl $5, %edx + addl %ecx, %r9d + xorl %r14d, %edx + xorl %r10d, %eax + rorl $6, %edx + movl %r10d, %ecx + addl %edx, %r9d + rorl $9, %ecx + andl %eax, %ebx + xorl %r10d, %ecx + xorl %r11d, %ebx + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + addl 316(%rsp), %r8d + movl %r14d, %ecx + movl %r10d, %ebx + xorl %r15d, %ecx + rorl $14, %edx + andl %r13d, %ecx + xorl %r13d, %edx + xorl %r15d, %ecx + rorl $5, %edx + addl %ecx, %r8d + xorl %r13d, %edx + xorl %r9d, %ebx + rorl $6, %edx + movl %r9d, %ecx + addl %edx, %r8d + rorl $9, %ecx + andl %ebx, %eax + xorl %r9d, %ecx + xorl %r10d, %eax + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + # rnd_all_4: 21-24 + addl 336(%rsp), %r15d + movl %r13d, %ecx + movl %r9d, %eax + xorl %r14d, %ecx + rorl $14, %edx + andl %r12d, %ecx + xorl %r12d, %edx + xorl %r14d, %ecx + rorl $5, %edx + addl %ecx, %r15d + xorl %r12d, %edx + xorl %r8d, %eax + rorl $6, %edx + movl %r8d, %ecx + addl %edx, %r15d + rorl $9, %ecx + andl %eax, %ebx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + addl 340(%rsp), %r14d + movl %r12d, %ecx + movl %r8d, %ebx + xorl %r13d, %ecx + rorl $14, %edx + andl %r11d, %ecx + xorl %r11d, %edx + xorl %r13d, %ecx + rorl $5, %edx + addl %ecx, %r14d + xorl %r11d, %edx + xorl %r15d, %ebx + rorl $6, %edx + movl %r15d, %ecx + addl %edx, %r14d + rorl $9, %ecx + andl %ebx, %eax + xorl %r15d, %ecx + xorl %r8d, %eax + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + addl 344(%rsp), %r13d + movl %r11d, %ecx + movl %r15d, %eax + xorl %r12d, %ecx + rorl $14, %edx + andl %r10d, %ecx + xorl %r10d, %edx + xorl %r12d, %ecx + rorl $5, %edx + addl %ecx, %r13d + xorl %r10d, %edx + xorl %r14d, %eax + rorl $6, %edx + movl %r14d, %ecx + addl %edx, %r13d + rorl $9, %ecx + andl %eax, %ebx + xorl %r14d, %ecx + xorl %r15d, %ebx + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + addl 348(%rsp), %r12d + movl %r10d, %ecx + movl %r14d, %ebx + xorl %r11d, %ecx + rorl $14, %edx + andl %r9d, %ecx + xorl %r9d, %edx + xorl %r11d, %ecx + rorl $5, %edx + addl %ecx, %r12d + xorl %r9d, %edx + xorl %r13d, %ebx + rorl $6, %edx + movl %r13d, %ecx + addl %edx, %r12d + rorl $9, %ecx + andl %ebx, %eax + xorl %r13d, %ecx + xorl %r14d, %eax + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + # rnd_all_4: 23-26 + addl 368(%rsp), %r11d + movl %r9d, %ecx + movl %r13d, %eax + xorl %r10d, %ecx + rorl $14, %edx + andl %r8d, %ecx + xorl %r8d, %edx + xorl %r10d, %ecx + rorl $5, %edx + addl %ecx, %r11d + xorl %r8d, %edx + xorl %r12d, %eax + rorl $6, %edx + movl %r12d, %ecx + addl %edx, %r11d + rorl $9, %ecx + andl %eax, %ebx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + addl 372(%rsp), %r10d + movl %r8d, %ecx + movl %r12d, %ebx + xorl %r9d, %ecx + rorl $14, %edx + andl %r15d, %ecx + xorl %r15d, %edx + xorl %r9d, %ecx + rorl $5, %edx + addl %ecx, %r10d + xorl %r15d, %edx + xorl %r11d, %ebx + rorl $6, %edx + movl %r11d, %ecx + addl %edx, %r10d + rorl $9, %ecx + andl %ebx, %eax + xorl %r11d, %ecx + xorl %r12d, %eax + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + addl 376(%rsp), %r9d + movl %r15d, %ecx + movl %r11d, %eax + xorl %r8d, %ecx + rorl $14, %edx + andl %r14d, %ecx + xorl %r14d, %edx + xorl %r8d, %ecx + rorl $5, %edx + addl %ecx, %r9d + xorl %r14d, %edx + xorl %r10d, %eax + rorl $6, %edx + movl %r10d, %ecx + addl %edx, %r9d + rorl $9, %ecx + andl %eax, %ebx + xorl %r10d, %ecx + xorl %r11d, %ebx + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + addl 380(%rsp), %r8d + movl %r14d, %ecx + movl %r10d, %ebx + xorl %r15d, %ecx + rorl $14, %edx + andl %r13d, %ecx + xorl %r13d, %edx + xorl %r15d, %ecx + rorl $5, %edx + addl %ecx, %r8d + xorl %r13d, %edx + xorl %r9d, %ebx + rorl $6, %edx + movl %r9d, %ecx + addl %edx, %r8d + rorl $9, %ecx + andl %ebx, %eax + xorl %r9d, %ecx + xorl %r10d, %eax + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + # rnd_all_4: 25-28 + addl 400(%rsp), %r15d + movl %r13d, %ecx + movl %r9d, %eax + xorl %r14d, %ecx + rorl $14, %edx + andl %r12d, %ecx + xorl %r12d, %edx + xorl %r14d, %ecx + rorl $5, %edx + addl %ecx, %r15d + xorl %r12d, %edx + xorl %r8d, %eax + rorl $6, %edx + movl %r8d, %ecx + addl %edx, %r15d + rorl $9, %ecx + andl %eax, %ebx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + addl 404(%rsp), %r14d + movl %r12d, %ecx + movl %r8d, %ebx + xorl %r13d, %ecx + rorl $14, %edx + andl %r11d, %ecx + xorl %r11d, %edx + xorl %r13d, %ecx + rorl $5, %edx + addl %ecx, %r14d + xorl %r11d, %edx + xorl %r15d, %ebx + rorl $6, %edx + movl %r15d, %ecx + addl %edx, %r14d + rorl $9, %ecx + andl %ebx, %eax + xorl %r15d, %ecx + xorl %r8d, %eax + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + addl 408(%rsp), %r13d + movl %r11d, %ecx + movl %r15d, %eax + xorl %r12d, %ecx + rorl $14, %edx + andl %r10d, %ecx + xorl %r10d, %edx + xorl %r12d, %ecx + rorl $5, %edx + addl %ecx, %r13d + xorl %r10d, %edx + xorl %r14d, %eax + rorl $6, %edx + movl %r14d, %ecx + addl %edx, %r13d + rorl $9, %ecx + andl %eax, %ebx + xorl %r14d, %ecx + xorl %r15d, %ebx + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + addl 412(%rsp), %r12d + movl %r10d, %ecx + movl %r14d, %ebx + xorl %r11d, %ecx + rorl $14, %edx + andl %r9d, %ecx + xorl %r9d, %edx + xorl %r11d, %ecx + rorl $5, %edx + addl %ecx, %r12d + xorl %r9d, %edx + xorl %r13d, %ebx + rorl $6, %edx + movl %r13d, %ecx + addl %edx, %r12d + rorl $9, %ecx + andl %ebx, %eax + xorl %r13d, %ecx + xorl %r14d, %eax + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + # rnd_all_4: 27-30 + addl 432(%rsp), %r11d + movl %r9d, %ecx + movl %r13d, %eax + xorl %r10d, %ecx + rorl $14, %edx + andl %r8d, %ecx + xorl %r8d, %edx + xorl %r10d, %ecx + rorl $5, %edx + addl %ecx, %r11d + xorl %r8d, %edx + xorl %r12d, %eax + rorl $6, %edx + movl %r12d, %ecx + addl %edx, %r11d + rorl $9, %ecx + andl %eax, %ebx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + addl 436(%rsp), %r10d + movl %r8d, %ecx + movl %r12d, %ebx + xorl %r9d, %ecx + rorl $14, %edx + andl %r15d, %ecx + xorl %r15d, %edx + xorl %r9d, %ecx + rorl $5, %edx + addl %ecx, %r10d + xorl %r15d, %edx + xorl %r11d, %ebx + rorl $6, %edx + movl %r11d, %ecx + addl %edx, %r10d + rorl $9, %ecx + andl %ebx, %eax + xorl %r11d, %ecx + xorl %r12d, %eax + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + addl 440(%rsp), %r9d + movl %r15d, %ecx + movl %r11d, %eax + xorl %r8d, %ecx + rorl $14, %edx + andl %r14d, %ecx + xorl %r14d, %edx + xorl %r8d, %ecx + rorl $5, %edx + addl %ecx, %r9d + xorl %r14d, %edx + xorl %r10d, %eax + rorl $6, %edx + movl %r10d, %ecx + addl %edx, %r9d + rorl $9, %ecx + andl %eax, %ebx + xorl %r10d, %ecx + xorl %r11d, %ebx + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + addl 444(%rsp), %r8d + movl %r14d, %ecx + movl %r10d, %ebx + xorl %r15d, %ecx + rorl $14, %edx + andl %r13d, %ecx + xorl %r13d, %edx + xorl %r15d, %ecx + rorl $5, %edx + addl %ecx, %r8d + xorl %r13d, %edx + xorl %r9d, %ebx + rorl $6, %edx + movl %r9d, %ecx + addl %edx, %r8d + rorl $9, %ecx + andl %ebx, %eax + xorl %r9d, %ecx + xorl %r10d, %eax + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + # rnd_all_4: 29-32 + addl 464(%rsp), %r15d + movl %r13d, %ecx + movl %r9d, %eax + xorl %r14d, %ecx + rorl $14, %edx + andl %r12d, %ecx + xorl %r12d, %edx + xorl %r14d, %ecx + rorl $5, %edx + addl %ecx, %r15d + xorl %r12d, %edx + xorl %r8d, %eax + rorl $6, %edx + movl %r8d, %ecx + addl %edx, %r15d + rorl $9, %ecx + andl %eax, %ebx + xorl %r8d, %ecx + xorl %r9d, %ebx + rorl $11, %ecx + addl %r15d, %r11d + xorl %r8d, %ecx + addl %ebx, %r15d + rorl $2, %ecx + movl %r11d, %edx + addl %ecx, %r15d + addl 468(%rsp), %r14d + movl %r12d, %ecx + movl %r8d, %ebx + xorl %r13d, %ecx + rorl $14, %edx + andl %r11d, %ecx + xorl %r11d, %edx + xorl %r13d, %ecx + rorl $5, %edx + addl %ecx, %r14d + xorl %r11d, %edx + xorl %r15d, %ebx + rorl $6, %edx + movl %r15d, %ecx + addl %edx, %r14d + rorl $9, %ecx + andl %ebx, %eax + xorl %r15d, %ecx + xorl %r8d, %eax + rorl $11, %ecx + addl %r14d, %r10d + xorl %r15d, %ecx + addl %eax, %r14d + rorl $2, %ecx + movl %r10d, %edx + addl %ecx, %r14d + addl 472(%rsp), %r13d + movl %r11d, %ecx + movl %r15d, %eax + xorl %r12d, %ecx + rorl $14, %edx + andl %r10d, %ecx + xorl %r10d, %edx + xorl %r12d, %ecx + rorl $5, %edx + addl %ecx, %r13d + xorl %r10d, %edx + xorl %r14d, %eax + rorl $6, %edx + movl %r14d, %ecx + addl %edx, %r13d + rorl $9, %ecx + andl %eax, %ebx + xorl %r14d, %ecx + xorl %r15d, %ebx + rorl $11, %ecx + addl %r13d, %r9d + xorl %r14d, %ecx + addl %ebx, %r13d + rorl $2, %ecx + movl %r9d, %edx + addl %ecx, %r13d + addl 476(%rsp), %r12d + movl %r10d, %ecx + movl %r14d, %ebx + xorl %r11d, %ecx + rorl $14, %edx + andl %r9d, %ecx + xorl %r9d, %edx + xorl %r11d, %ecx + rorl $5, %edx + addl %ecx, %r12d + xorl %r9d, %edx + xorl %r13d, %ebx + rorl $6, %edx + movl %r13d, %ecx + addl %edx, %r12d + rorl $9, %ecx + andl %ebx, %eax + xorl %r13d, %ecx + xorl %r14d, %eax + rorl $11, %ecx + addl %r12d, %r8d + xorl %r13d, %ecx + addl %eax, %r12d + rorl $2, %ecx + movl %r8d, %edx + addl %ecx, %r12d + # rnd_all_4: 31-34 + addl 496(%rsp), %r11d + movl %r9d, %ecx + movl %r13d, %eax + xorl %r10d, %ecx + rorl $14, %edx + andl %r8d, %ecx + xorl %r8d, %edx + xorl %r10d, %ecx + rorl $5, %edx + addl %ecx, %r11d + xorl %r8d, %edx + xorl %r12d, %eax + rorl $6, %edx + movl %r12d, %ecx + addl %edx, %r11d + rorl $9, %ecx + andl %eax, %ebx + xorl %r12d, %ecx + xorl %r13d, %ebx + rorl $11, %ecx + addl %r11d, %r15d + xorl %r12d, %ecx + addl %ebx, %r11d + rorl $2, %ecx + movl %r15d, %edx + addl %ecx, %r11d + addl 500(%rsp), %r10d + movl %r8d, %ecx + movl %r12d, %ebx + xorl %r9d, %ecx + rorl $14, %edx + andl %r15d, %ecx + xorl %r15d, %edx + xorl %r9d, %ecx + rorl $5, %edx + addl %ecx, %r10d + xorl %r15d, %edx + xorl %r11d, %ebx + rorl $6, %edx + movl %r11d, %ecx + addl %edx, %r10d + rorl $9, %ecx + andl %ebx, %eax + xorl %r11d, %ecx + xorl %r12d, %eax + rorl $11, %ecx + addl %r10d, %r14d + xorl %r11d, %ecx + addl %eax, %r10d + rorl $2, %ecx + movl %r14d, %edx + addl %ecx, %r10d + addl 504(%rsp), %r9d + movl %r15d, %ecx + movl %r11d, %eax + xorl %r8d, %ecx + rorl $14, %edx + andl %r14d, %ecx + xorl %r14d, %edx + xorl %r8d, %ecx + rorl $5, %edx + addl %ecx, %r9d + xorl %r14d, %edx + xorl %r10d, %eax + rorl $6, %edx + movl %r10d, %ecx + addl %edx, %r9d + rorl $9, %ecx + andl %eax, %ebx + xorl %r10d, %ecx + xorl %r11d, %ebx + rorl $11, %ecx + addl %r9d, %r13d + xorl %r10d, %ecx + addl %ebx, %r9d + rorl $2, %ecx + movl %r13d, %edx + addl %ecx, %r9d + addl 508(%rsp), %r8d + movl %r14d, %ecx + movl %r10d, %ebx + xorl %r15d, %ecx + rorl $14, %edx + andl %r13d, %ecx + xorl %r13d, %edx + xorl %r15d, %ecx + rorl $5, %edx + addl %ecx, %r8d + xorl %r13d, %edx + xorl %r9d, %ebx + rorl $6, %edx + movl %r9d, %ecx + addl %edx, %r8d + rorl $9, %ecx + andl %ebx, %eax + xorl %r9d, %ecx + xorl %r10d, %eax + rorl $11, %ecx + addl %r8d, %r12d + xorl %r9d, %ecx + addl %eax, %r8d + rorl $2, %ecx + movl %r12d, %edx + addl %ecx, %r8d + addl (%rdi), %r8d + addl 4(%rdi), %r9d + addl 8(%rdi), %r10d + addl 12(%rdi), %r11d + addl 16(%rdi), %r12d + addl 20(%rdi), %r13d + addl 24(%rdi), %r14d + addl 28(%rdi), %r15d + addq $0x80, %rbp + subl $0x80, %esi + movl %r8d, (%rdi) + movl %r9d, 4(%rdi) + movl %r10d, 8(%rdi) + movl %r11d, 12(%rdi) + movl %r12d, 16(%rdi) + movl %r13d, 20(%rdi) + movl %r14d, 24(%rdi) + movl %r15d, 28(%rdi) + jnz L_sha256_len_avx2_start +L_sha256_len_avx2_done: + xorq %rax, %rax + vzeroupper + addq $0x200, %rsp + popq %rbp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + repz retq +#ifndef __APPLE__ +.size Transform_Sha256_AVX2_Len,.-Transform_Sha256_AVX2_Len +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +L_avx2_rorx_sha256_k: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0xfc19dc6,0x240ca1cc +.long 0xe49b69c1,0xefbe4786,0xfc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x6ca6351,0x14292967 +.long 0xc6e00bf3,0xd5a79147,0x6ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 32 +#else +.p2align 5 +#endif /* __APPLE__ */ +L_avx2_rorx_sha256_flip_mask: +.quad 0x405060700010203, 0xc0d0e0f08090a0b +.quad 0x405060700010203, 0xc0d0e0f08090a0b +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 32 +#else +.p2align 5 +#endif /* __APPLE__ */ +L_avx2_rorx_sha256_shuf_00BA: +.quad 0xb0a090803020100, 0xffffffffffffffff +.quad 0xb0a090803020100, 0xffffffffffffffff +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 32 +#else +.p2align 5 +#endif /* __APPLE__ */ +L_avx2_rorx_sha256_shuf_DC00: +.quad 0xffffffffffffffff, 0xb0a090803020100 +.quad 0xffffffffffffffff, 0xb0a090803020100 +#ifndef __APPLE__ +.text +.globl Transform_Sha256_AVX2_RORX +.type Transform_Sha256_AVX2_RORX,@function +.align 4 +Transform_Sha256_AVX2_RORX: +#else +.section __TEXT,__text +.globl _Transform_Sha256_AVX2_RORX +.p2align 2 +_Transform_Sha256_AVX2_RORX: +#endif /* __APPLE__ */ + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $0x200, %rsp + leaq 32(%rdi), %rax + vmovdqa L_avx2_rorx_sha256_flip_mask(%rip), %xmm13 + vmovdqa L_avx2_rorx_sha256_shuf_00BA(%rip), %ymm11 + vmovdqa L_avx2_rorx_sha256_shuf_DC00(%rip), %ymm12 + # X0, X1, X2, X3 = W[0..15] + vmovdqu (%rax), %xmm0 + vmovdqu 16(%rax), %xmm1 + vpshufb %xmm13, %xmm0, %xmm0 + vpshufb %xmm13, %xmm1, %xmm1 + vpaddd 0+L_avx2_rorx_sha256_k(%rip), %ymm0, %ymm4 + vpaddd 32+L_avx2_rorx_sha256_k(%rip), %ymm1, %ymm5 + vmovdqu %ymm4, (%rsp) + vmovdqu %ymm5, 32(%rsp) + vmovdqu 32(%rax), %xmm2 + vmovdqu 48(%rax), %xmm3 + vpshufb %xmm13, %xmm2, %xmm2 + vpshufb %xmm13, %xmm3, %xmm3 + vpaddd 64+L_avx2_rorx_sha256_k(%rip), %ymm2, %ymm4 + vpaddd 96+L_avx2_rorx_sha256_k(%rip), %ymm3, %ymm5 + vmovdqu %ymm4, 64(%rsp) + vmovdqu %ymm5, 96(%rsp) + movl (%rdi), %r8d + movl 4(%rdi), %r9d + movl 8(%rdi), %r10d + movl 12(%rdi), %r11d + movl 16(%rdi), %r12d + movl 20(%rdi), %r13d + movl 24(%rdi), %r14d + movl 28(%rdi), %r15d + movl %r9d, %ebx + rorxl $6, %r12d, %edx + xorl %r10d, %ebx + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl (%rsp), %r15d + vpalignr $4, %ymm0, %ymm1, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + vpalignr $4, %ymm2, %ymm3, %ymm4 + # rnd_0: 2 - 2 + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 4(%rsp), %r14d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpshufd $0xfa, %ymm3, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r14d, %r10d + movl %r8d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + vpaddd %ymm0, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 8(%rsp), %r13d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 12(%rsp), %r12d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpaddd %ymm4, %ymm9, %ymm0 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r12d, %r8d + movl %r14d, %ebx + vpaddd 128+L_avx2_rorx_sha256_k(%rip), %ymm0, %ymm4 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vmovdqu %ymm4, 128(%rsp) + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 32(%rsp), %r11d + vpalignr $4, %ymm1, %ymm2, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + vpalignr $4, %ymm3, %ymm0, %ymm4 + # rnd_0: 2 - 2 + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 36(%rsp), %r10d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpshufd $0xfa, %ymm0, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r10d, %r14d + movl %r12d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + vpaddd %ymm1, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 40(%rsp), %r9d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 44(%rsp), %r8d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpaddd %ymm4, %ymm9, %ymm1 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r8d, %r12d + movl %r10d, %ebx + vpaddd 160+L_avx2_rorx_sha256_k(%rip), %ymm1, %ymm4 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vmovdqu %ymm4, 160(%rsp) + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl 64(%rsp), %r15d + vpalignr $4, %ymm2, %ymm3, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + vpalignr $4, %ymm0, %ymm1, %ymm4 + # rnd_0: 2 - 2 + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 68(%rsp), %r14d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpshufd $0xfa, %ymm1, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r14d, %r10d + movl %r8d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + vpaddd %ymm2, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 72(%rsp), %r13d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 76(%rsp), %r12d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpaddd %ymm4, %ymm9, %ymm2 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r12d, %r8d + movl %r14d, %ebx + vpaddd 192+L_avx2_rorx_sha256_k(%rip), %ymm2, %ymm4 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vmovdqu %ymm4, 192(%rsp) + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 96(%rsp), %r11d + vpalignr $4, %ymm3, %ymm0, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + vpalignr $4, %ymm1, %ymm2, %ymm4 + # rnd_0: 2 - 2 + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 100(%rsp), %r10d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpshufd $0xfa, %ymm2, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r10d, %r14d + movl %r12d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + vpaddd %ymm3, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 104(%rsp), %r9d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 108(%rsp), %r8d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpaddd %ymm4, %ymm9, %ymm3 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r8d, %r12d + movl %r10d, %ebx + vpaddd 224+L_avx2_rorx_sha256_k(%rip), %ymm3, %ymm4 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vmovdqu %ymm4, 224(%rsp) + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl 128(%rsp), %r15d + vpalignr $4, %ymm0, %ymm1, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + vpalignr $4, %ymm2, %ymm3, %ymm4 + # rnd_0: 2 - 2 + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 132(%rsp), %r14d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpshufd $0xfa, %ymm3, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r14d, %r10d + movl %r8d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + vpaddd %ymm0, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 136(%rsp), %r13d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 140(%rsp), %r12d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpaddd %ymm4, %ymm9, %ymm0 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r12d, %r8d + movl %r14d, %ebx + vpaddd 256+L_avx2_rorx_sha256_k(%rip), %ymm0, %ymm4 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vmovdqu %ymm4, 256(%rsp) + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 160(%rsp), %r11d + vpalignr $4, %ymm1, %ymm2, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + vpalignr $4, %ymm3, %ymm0, %ymm4 + # rnd_0: 2 - 2 + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 164(%rsp), %r10d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpshufd $0xfa, %ymm0, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r10d, %r14d + movl %r12d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + vpaddd %ymm1, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 168(%rsp), %r9d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 172(%rsp), %r8d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpaddd %ymm4, %ymm9, %ymm1 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r8d, %r12d + movl %r10d, %ebx + vpaddd 288+L_avx2_rorx_sha256_k(%rip), %ymm1, %ymm4 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vmovdqu %ymm4, 288(%rsp) + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl 192(%rsp), %r15d + vpalignr $4, %ymm2, %ymm3, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + vpalignr $4, %ymm0, %ymm1, %ymm4 + # rnd_0: 2 - 2 + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 196(%rsp), %r14d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpshufd $0xfa, %ymm1, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r14d, %r10d + movl %r8d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + vpaddd %ymm2, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 200(%rsp), %r13d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 204(%rsp), %r12d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpaddd %ymm4, %ymm9, %ymm2 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r12d, %r8d + movl %r14d, %ebx + vpaddd 320+L_avx2_rorx_sha256_k(%rip), %ymm2, %ymm4 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vmovdqu %ymm4, 320(%rsp) + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 224(%rsp), %r11d + vpalignr $4, %ymm3, %ymm0, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + vpalignr $4, %ymm1, %ymm2, %ymm4 + # rnd_0: 2 - 2 + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 228(%rsp), %r10d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpshufd $0xfa, %ymm2, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r10d, %r14d + movl %r12d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + vpaddd %ymm3, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 232(%rsp), %r9d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 236(%rsp), %r8d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpaddd %ymm4, %ymm9, %ymm3 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r8d, %r12d + movl %r10d, %ebx + vpaddd 352+L_avx2_rorx_sha256_k(%rip), %ymm3, %ymm4 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vmovdqu %ymm4, 352(%rsp) + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl 256(%rsp), %r15d + vpalignr $4, %ymm0, %ymm1, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + vpalignr $4, %ymm2, %ymm3, %ymm4 + # rnd_0: 2 - 2 + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 260(%rsp), %r14d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpshufd $0xfa, %ymm3, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r14d, %r10d + movl %r8d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + vpaddd %ymm0, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 264(%rsp), %r13d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 268(%rsp), %r12d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpaddd %ymm4, %ymm9, %ymm0 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r12d, %r8d + movl %r14d, %ebx + vpaddd 384+L_avx2_rorx_sha256_k(%rip), %ymm0, %ymm4 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vmovdqu %ymm4, 384(%rsp) + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 288(%rsp), %r11d + vpalignr $4, %ymm1, %ymm2, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + vpalignr $4, %ymm3, %ymm0, %ymm4 + # rnd_0: 2 - 2 + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 292(%rsp), %r10d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpshufd $0xfa, %ymm0, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r10d, %r14d + movl %r12d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + vpaddd %ymm1, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 296(%rsp), %r9d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 300(%rsp), %r8d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpaddd %ymm4, %ymm9, %ymm1 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r8d, %r12d + movl %r10d, %ebx + vpaddd 416+L_avx2_rorx_sha256_k(%rip), %ymm1, %ymm4 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vmovdqu %ymm4, 416(%rsp) + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl 320(%rsp), %r15d + vpalignr $4, %ymm2, %ymm3, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + vpalignr $4, %ymm0, %ymm1, %ymm4 + # rnd_0: 2 - 2 + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 324(%rsp), %r14d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpshufd $0xfa, %ymm1, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r14d, %r10d + movl %r8d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + vpaddd %ymm2, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 328(%rsp), %r13d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 332(%rsp), %r12d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpaddd %ymm4, %ymm9, %ymm2 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r12d, %r8d + movl %r14d, %ebx + vpaddd 448+L_avx2_rorx_sha256_k(%rip), %ymm2, %ymm4 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vmovdqu %ymm4, 448(%rsp) + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 352(%rsp), %r11d + vpalignr $4, %ymm3, %ymm0, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + vpalignr $4, %ymm1, %ymm2, %ymm4 + # rnd_0: 2 - 2 + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 356(%rsp), %r10d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpshufd $0xfa, %ymm2, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r10d, %r14d + movl %r12d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + vpaddd %ymm3, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 360(%rsp), %r9d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 364(%rsp), %r8d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpaddd %ymm4, %ymm9, %ymm3 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r8d, %r12d + movl %r10d, %ebx + vpaddd 480+L_avx2_rorx_sha256_k(%rip), %ymm3, %ymm4 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vmovdqu %ymm4, 480(%rsp) + xorl %eax, %eax + xorl %ecx, %ecx + rorxl $6, %r12d, %edx + rorxl $11, %r12d, %ecx + leal (%r8,%rax,1), %r8d + addl 384(%rsp), %r15d + movl %r13d, %eax + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + xorl %ecx, %edx + andl %r12d, %eax + addl %edx, %r15d + rorxl $2, %r8d, %edx + rorxl $13, %r8d, %ecx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + andl %eax, %ebx + addl %edx, %r15d + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + rorxl $11, %r11d, %ecx + addl %ebx, %r15d + addl 388(%rsp), %r14d + movl %r12d, %ebx + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + xorl %ecx, %edx + andl %r11d, %ebx + addl %edx, %r14d + rorxl $2, %r15d, %edx + rorxl $13, %r15d, %ecx + xorl %r13d, %ebx + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + xorl %ecx, %edx + movl %r8d, %ebx + leal (%r10,%r14,1), %r10d + xorl %r15d, %ebx + andl %ebx, %eax + addl %edx, %r14d + xorl %r8d, %eax + rorxl $6, %r10d, %edx + rorxl $11, %r10d, %ecx + leal (%r14,%rax,1), %r14d + addl 392(%rsp), %r13d + movl %r11d, %eax + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + xorl %ecx, %edx + andl %r10d, %eax + addl %edx, %r13d + rorxl $2, %r14d, %edx + rorxl $13, %r14d, %ecx + xorl %r12d, %eax + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + xorl %r14d, %eax + andl %eax, %ebx + addl %edx, %r13d + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + rorxl $11, %r9d, %ecx + addl %ebx, %r13d + addl 396(%rsp), %r12d + movl %r10d, %ebx + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + xorl %ecx, %edx + andl %r9d, %ebx + addl %edx, %r12d + rorxl $2, %r13d, %edx + rorxl $13, %r13d, %ecx + xorl %r11d, %ebx + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + xorl %ecx, %edx + movl %r14d, %ebx + leal (%r8,%r12,1), %r8d + xorl %r13d, %ebx + andl %ebx, %eax + addl %edx, %r12d + xorl %r14d, %eax + rorxl $6, %r8d, %edx + rorxl $11, %r8d, %ecx + leal (%r12,%rax,1), %r12d + addl 416(%rsp), %r11d + movl %r9d, %eax + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + xorl %ecx, %edx + andl %r8d, %eax + addl %edx, %r11d + rorxl $2, %r12d, %edx + rorxl $13, %r12d, %ecx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + andl %eax, %ebx + addl %edx, %r11d + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + rorxl $11, %r15d, %ecx + addl %ebx, %r11d + addl 420(%rsp), %r10d + movl %r8d, %ebx + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + xorl %ecx, %edx + andl %r15d, %ebx + addl %edx, %r10d + rorxl $2, %r11d, %edx + rorxl $13, %r11d, %ecx + xorl %r9d, %ebx + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + xorl %ecx, %edx + movl %r12d, %ebx + leal (%r14,%r10,1), %r14d + xorl %r11d, %ebx + andl %ebx, %eax + addl %edx, %r10d + xorl %r12d, %eax + rorxl $6, %r14d, %edx + rorxl $11, %r14d, %ecx + leal (%r10,%rax,1), %r10d + addl 424(%rsp), %r9d + movl %r15d, %eax + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + xorl %ecx, %edx + andl %r14d, %eax + addl %edx, %r9d + rorxl $2, %r10d, %edx + rorxl $13, %r10d, %ecx + xorl %r8d, %eax + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + xorl %r10d, %eax + andl %eax, %ebx + addl %edx, %r9d + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + rorxl $11, %r13d, %ecx + addl %ebx, %r9d + addl 428(%rsp), %r8d + movl %r14d, %ebx + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + xorl %ecx, %edx + andl %r13d, %ebx + addl %edx, %r8d + rorxl $2, %r9d, %edx + rorxl $13, %r9d, %ecx + xorl %r15d, %ebx + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + xorl %ecx, %edx + movl %r10d, %ebx + leal (%r12,%r8,1), %r12d + xorl %r9d, %ebx + andl %ebx, %eax + addl %edx, %r8d + xorl %r10d, %eax + rorxl $6, %r12d, %edx + rorxl $11, %r12d, %ecx + leal (%r8,%rax,1), %r8d + addl 448(%rsp), %r15d + movl %r13d, %eax + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + xorl %ecx, %edx + andl %r12d, %eax + addl %edx, %r15d + rorxl $2, %r8d, %edx + rorxl $13, %r8d, %ecx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + andl %eax, %ebx + addl %edx, %r15d + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + rorxl $11, %r11d, %ecx + addl %ebx, %r15d + addl 452(%rsp), %r14d + movl %r12d, %ebx + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + xorl %ecx, %edx + andl %r11d, %ebx + addl %edx, %r14d + rorxl $2, %r15d, %edx + rorxl $13, %r15d, %ecx + xorl %r13d, %ebx + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + xorl %ecx, %edx + movl %r8d, %ebx + leal (%r10,%r14,1), %r10d + xorl %r15d, %ebx + andl %ebx, %eax + addl %edx, %r14d + xorl %r8d, %eax + rorxl $6, %r10d, %edx + rorxl $11, %r10d, %ecx + leal (%r14,%rax,1), %r14d + addl 456(%rsp), %r13d + movl %r11d, %eax + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + xorl %ecx, %edx + andl %r10d, %eax + addl %edx, %r13d + rorxl $2, %r14d, %edx + rorxl $13, %r14d, %ecx + xorl %r12d, %eax + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + xorl %r14d, %eax + andl %eax, %ebx + addl %edx, %r13d + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + rorxl $11, %r9d, %ecx + addl %ebx, %r13d + addl 460(%rsp), %r12d + movl %r10d, %ebx + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + xorl %ecx, %edx + andl %r9d, %ebx + addl %edx, %r12d + rorxl $2, %r13d, %edx + rorxl $13, %r13d, %ecx + xorl %r11d, %ebx + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + xorl %ecx, %edx + movl %r14d, %ebx + leal (%r8,%r12,1), %r8d + xorl %r13d, %ebx + andl %ebx, %eax + addl %edx, %r12d + xorl %r14d, %eax + rorxl $6, %r8d, %edx + rorxl $11, %r8d, %ecx + leal (%r12,%rax,1), %r12d + addl 480(%rsp), %r11d + movl %r9d, %eax + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + xorl %ecx, %edx + andl %r8d, %eax + addl %edx, %r11d + rorxl $2, %r12d, %edx + rorxl $13, %r12d, %ecx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + andl %eax, %ebx + addl %edx, %r11d + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + rorxl $11, %r15d, %ecx + addl %ebx, %r11d + addl 484(%rsp), %r10d + movl %r8d, %ebx + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + xorl %ecx, %edx + andl %r15d, %ebx + addl %edx, %r10d + rorxl $2, %r11d, %edx + rorxl $13, %r11d, %ecx + xorl %r9d, %ebx + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + xorl %ecx, %edx + movl %r12d, %ebx + leal (%r14,%r10,1), %r14d + xorl %r11d, %ebx + andl %ebx, %eax + addl %edx, %r10d + xorl %r12d, %eax + rorxl $6, %r14d, %edx + rorxl $11, %r14d, %ecx + leal (%r10,%rax,1), %r10d + addl 488(%rsp), %r9d + movl %r15d, %eax + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + xorl %ecx, %edx + andl %r14d, %eax + addl %edx, %r9d + rorxl $2, %r10d, %edx + rorxl $13, %r10d, %ecx + xorl %r8d, %eax + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + xorl %r10d, %eax + andl %eax, %ebx + addl %edx, %r9d + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + rorxl $11, %r13d, %ecx + addl %ebx, %r9d + addl 492(%rsp), %r8d + movl %r14d, %ebx + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + xorl %ecx, %edx + andl %r13d, %ebx + addl %edx, %r8d + rorxl $2, %r9d, %edx + rorxl $13, %r9d, %ecx + xorl %r15d, %ebx + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + xorl %ecx, %edx + movl %r10d, %ebx + leal (%r12,%r8,1), %r12d + xorl %r9d, %ebx + andl %ebx, %eax + addl %edx, %r8d + xorl %r10d, %eax + addl %eax, %r8d + addl %r8d, (%rdi) + addl %r9d, 4(%rdi) + addl %r10d, 8(%rdi) + addl %r11d, 12(%rdi) + addl %r12d, 16(%rdi) + addl %r13d, 20(%rdi) + addl %r14d, 24(%rdi) + addl %r15d, 28(%rdi) + xorq %rax, %rax + vzeroupper + addq $0x200, %rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + repz retq +#ifndef __APPLE__ +.size Transform_Sha256_AVX2_RORX,.-Transform_Sha256_AVX2_RORX +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl Transform_Sha256_AVX2_RORX_Len +.type Transform_Sha256_AVX2_RORX_Len,@function +.align 4 +Transform_Sha256_AVX2_RORX_Len: +#else +.section __TEXT,__text +.globl _Transform_Sha256_AVX2_RORX_Len +.p2align 2 +_Transform_Sha256_AVX2_RORX_Len: +#endif /* __APPLE__ */ + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushq %rbp + movq %rsi, %rbp + movq %rdx, %rsi + subq $0x200, %rsp + testb $0x40, %sil + je L_sha256_len_avx2_rorx_block + vmovdqu (%rbp), %ymm0 + vmovdqu 32(%rbp), %ymm1 + vmovups %ymm0, 32(%rdi) + vmovups %ymm1, 64(%rdi) +#ifndef __APPLE__ + call Transform_Sha256_AVX2_RORX@plt +#else + call _Transform_Sha256_AVX2_RORX +#endif /* __APPLE__ */ + addq $0x40, %rbp + subl $0x40, %esi + jz L_sha256_len_avx2_rorx_done +L_sha256_len_avx2_rorx_block: + vmovdqa L_avx2_rorx_sha256_flip_mask(%rip), %ymm13 + vmovdqa L_avx2_rorx_sha256_shuf_00BA(%rip), %ymm11 + vmovdqa L_avx2_rorx_sha256_shuf_DC00(%rip), %ymm12 + movl (%rdi), %r8d + movl 4(%rdi), %r9d + movl 8(%rdi), %r10d + movl 12(%rdi), %r11d + movl 16(%rdi), %r12d + movl 20(%rdi), %r13d + movl 24(%rdi), %r14d + movl 28(%rdi), %r15d + # Start of loop processing two blocks +L_sha256_len_avx2_rorx_start: + # X0, X1, X2, X3 = W[0..15] + vmovdqu (%rbp), %xmm0 + vmovdqu 16(%rbp), %xmm1 + vinserti128 $0x01, 64(%rbp), %ymm0, %ymm0 + vinserti128 $0x01, 80(%rbp), %ymm1, %ymm1 + vpshufb %ymm13, %ymm0, %ymm0 + vpshufb %ymm13, %ymm1, %ymm1 + vpaddd 0+L_avx2_rorx_sha256_k(%rip), %ymm0, %ymm4 + vpaddd 32+L_avx2_rorx_sha256_k(%rip), %ymm1, %ymm5 + vmovdqu %ymm4, (%rsp) + vmovdqu %ymm5, 32(%rsp) + vmovdqu 32(%rbp), %xmm2 + vmovdqu 48(%rbp), %xmm3 + vinserti128 $0x01, 96(%rbp), %ymm2, %ymm2 + vinserti128 $0x01, 112(%rbp), %ymm3, %ymm3 + vpshufb %ymm13, %ymm2, %ymm2 + vpshufb %ymm13, %ymm3, %ymm3 + vpaddd 64+L_avx2_rorx_sha256_k(%rip), %ymm2, %ymm4 + vpaddd 96+L_avx2_rorx_sha256_k(%rip), %ymm3, %ymm5 + vmovdqu %ymm4, 64(%rsp) + vmovdqu %ymm5, 96(%rsp) + movl %r9d, %ebx + rorxl $6, %r12d, %edx + xorl %r10d, %ebx + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl (%rsp), %r15d + vpalignr $4, %ymm0, %ymm1, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + vpalignr $4, %ymm2, %ymm3, %ymm4 + # rnd_0: 2 - 2 + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 4(%rsp), %r14d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpshufd $0xfa, %ymm3, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r14d, %r10d + movl %r8d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + vpaddd %ymm0, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 8(%rsp), %r13d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 12(%rsp), %r12d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpaddd %ymm4, %ymm9, %ymm0 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r12d, %r8d + movl %r14d, %ebx + vpaddd 128+L_avx2_rorx_sha256_k(%rip), %ymm0, %ymm4 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vmovdqu %ymm4, 128(%rsp) + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 32(%rsp), %r11d + vpalignr $4, %ymm1, %ymm2, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + vpalignr $4, %ymm3, %ymm0, %ymm4 + # rnd_0: 2 - 2 + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 36(%rsp), %r10d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpshufd $0xfa, %ymm0, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r10d, %r14d + movl %r12d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + vpaddd %ymm1, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 40(%rsp), %r9d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 44(%rsp), %r8d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpaddd %ymm4, %ymm9, %ymm1 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r8d, %r12d + movl %r10d, %ebx + vpaddd 160+L_avx2_rorx_sha256_k(%rip), %ymm1, %ymm4 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vmovdqu %ymm4, 160(%rsp) + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl 64(%rsp), %r15d + vpalignr $4, %ymm2, %ymm3, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + vpalignr $4, %ymm0, %ymm1, %ymm4 + # rnd_0: 2 - 2 + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 68(%rsp), %r14d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpshufd $0xfa, %ymm1, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r14d, %r10d + movl %r8d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + vpaddd %ymm2, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 72(%rsp), %r13d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 76(%rsp), %r12d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpaddd %ymm4, %ymm9, %ymm2 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r12d, %r8d + movl %r14d, %ebx + vpaddd 192+L_avx2_rorx_sha256_k(%rip), %ymm2, %ymm4 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vmovdqu %ymm4, 192(%rsp) + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 96(%rsp), %r11d + vpalignr $4, %ymm3, %ymm0, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + vpalignr $4, %ymm1, %ymm2, %ymm4 + # rnd_0: 2 - 2 + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 100(%rsp), %r10d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpshufd $0xfa, %ymm2, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r10d, %r14d + movl %r12d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + vpaddd %ymm3, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 104(%rsp), %r9d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 108(%rsp), %r8d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpaddd %ymm4, %ymm9, %ymm3 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r8d, %r12d + movl %r10d, %ebx + vpaddd 224+L_avx2_rorx_sha256_k(%rip), %ymm3, %ymm4 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vmovdqu %ymm4, 224(%rsp) + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl 128(%rsp), %r15d + vpalignr $4, %ymm0, %ymm1, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + vpalignr $4, %ymm2, %ymm3, %ymm4 + # rnd_0: 2 - 2 + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 132(%rsp), %r14d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpshufd $0xfa, %ymm3, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r14d, %r10d + movl %r8d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + vpaddd %ymm0, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 136(%rsp), %r13d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 140(%rsp), %r12d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpaddd %ymm4, %ymm9, %ymm0 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r12d, %r8d + movl %r14d, %ebx + vpaddd 256+L_avx2_rorx_sha256_k(%rip), %ymm0, %ymm4 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vmovdqu %ymm4, 256(%rsp) + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 160(%rsp), %r11d + vpalignr $4, %ymm1, %ymm2, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + vpalignr $4, %ymm3, %ymm0, %ymm4 + # rnd_0: 2 - 2 + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 164(%rsp), %r10d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpshufd $0xfa, %ymm0, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r10d, %r14d + movl %r12d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + vpaddd %ymm1, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 168(%rsp), %r9d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 172(%rsp), %r8d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpaddd %ymm4, %ymm9, %ymm1 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r8d, %r12d + movl %r10d, %ebx + vpaddd 288+L_avx2_rorx_sha256_k(%rip), %ymm1, %ymm4 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vmovdqu %ymm4, 288(%rsp) + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl 192(%rsp), %r15d + vpalignr $4, %ymm2, %ymm3, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + vpalignr $4, %ymm0, %ymm1, %ymm4 + # rnd_0: 2 - 2 + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 196(%rsp), %r14d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpshufd $0xfa, %ymm1, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r14d, %r10d + movl %r8d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + vpaddd %ymm2, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 200(%rsp), %r13d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 204(%rsp), %r12d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpaddd %ymm4, %ymm9, %ymm2 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r12d, %r8d + movl %r14d, %ebx + vpaddd 320+L_avx2_rorx_sha256_k(%rip), %ymm2, %ymm4 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vmovdqu %ymm4, 320(%rsp) + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 224(%rsp), %r11d + vpalignr $4, %ymm3, %ymm0, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + vpalignr $4, %ymm1, %ymm2, %ymm4 + # rnd_0: 2 - 2 + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 228(%rsp), %r10d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpshufd $0xfa, %ymm2, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r10d, %r14d + movl %r12d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + vpaddd %ymm3, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 232(%rsp), %r9d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 236(%rsp), %r8d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpaddd %ymm4, %ymm9, %ymm3 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r8d, %r12d + movl %r10d, %ebx + vpaddd 352+L_avx2_rorx_sha256_k(%rip), %ymm3, %ymm4 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vmovdqu %ymm4, 352(%rsp) + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl 256(%rsp), %r15d + vpalignr $4, %ymm0, %ymm1, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + vpalignr $4, %ymm2, %ymm3, %ymm4 + # rnd_0: 2 - 2 + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 260(%rsp), %r14d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpshufd $0xfa, %ymm3, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r14d, %r10d + movl %r8d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + vpaddd %ymm0, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 264(%rsp), %r13d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 268(%rsp), %r12d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpaddd %ymm4, %ymm9, %ymm0 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r12d, %r8d + movl %r14d, %ebx + vpaddd 384+L_avx2_rorx_sha256_k(%rip), %ymm0, %ymm4 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vmovdqu %ymm4, 384(%rsp) + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 288(%rsp), %r11d + vpalignr $4, %ymm1, %ymm2, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + vpalignr $4, %ymm3, %ymm0, %ymm4 + # rnd_0: 2 - 2 + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 292(%rsp), %r10d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpshufd $0xfa, %ymm0, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r10d, %r14d + movl %r12d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + vpaddd %ymm1, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 296(%rsp), %r9d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 300(%rsp), %r8d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpaddd %ymm4, %ymm9, %ymm1 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r8d, %r12d + movl %r10d, %ebx + vpaddd 416+L_avx2_rorx_sha256_k(%rip), %ymm1, %ymm4 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vmovdqu %ymm4, 416(%rsp) + # rnd_0: 0 - 0 + movl %r13d, %eax + rorxl $11, %r12d, %ecx + addl 320(%rsp), %r15d + vpalignr $4, %ymm2, %ymm3, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + vpalignr $4, %ymm0, %ymm1, %ymm4 + # rnd_0: 2 - 2 + andl %r12d, %eax + xorl %ecx, %edx + rorxl $13, %r8d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r15d + rorxl $2, %r8d, %edx + xorl %r14d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r8d, %eax + addl %edx, %r15d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + addl %ebx, %r15d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r12d, %ebx + rorxl $11, %r11d, %ecx + addl 324(%rsp), %r14d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r11d, %ebx + xorl %ecx, %edx + rorxl $13, %r15d, %ecx + vpshufd $0xfa, %ymm1, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r14d + rorxl $2, %r15d, %edx + xorl %r13d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r14d, %r10d + movl %r8d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r15d, %ebx + addl %edx, %r14d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r8d, %eax + rorxl $6, %r10d, %edx + addl %eax, %r14d + vpaddd %ymm2, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r11d, %eax + rorxl $11, %r10d, %ecx + addl 328(%rsp), %r13d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r10d, %eax + xorl %ecx, %edx + rorxl $13, %r14d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r13d + rorxl $2, %r14d, %edx + xorl %r12d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r14d, %eax + addl %edx, %r13d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + addl %ebx, %r13d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r10d, %ebx + rorxl $11, %r9d, %ecx + addl 332(%rsp), %r12d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r9d, %ebx + xorl %ecx, %edx + rorxl $13, %r13d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r12d + rorxl $2, %r13d, %edx + xorl %r11d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + vpaddd %ymm4, %ymm9, %ymm2 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r12d, %r8d + movl %r14d, %ebx + vpaddd 448+L_avx2_rorx_sha256_k(%rip), %ymm2, %ymm4 + # rnd_1: 6 - 6 + xorl %r13d, %ebx + addl %edx, %r12d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r14d, %eax + rorxl $6, %r8d, %edx + addl %eax, %r12d + vmovdqu %ymm4, 448(%rsp) + # rnd_0: 0 - 0 + movl %r9d, %eax + rorxl $11, %r8d, %ecx + addl 352(%rsp), %r11d + vpalignr $4, %ymm3, %ymm0, %ymm5 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + vpalignr $4, %ymm1, %ymm2, %ymm4 + # rnd_0: 2 - 2 + andl %r8d, %eax + xorl %ecx, %edx + rorxl $13, %r12d, %ecx + vpsrld $7, %ymm5, %ymm6 + # rnd_0: 3 - 3 + addl %edx, %r11d + rorxl $2, %r12d, %edx + xorl %r10d, %eax + vpslld $25, %ymm5, %ymm7 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + vpsrld $18, %ymm5, %ymm8 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + vpslld $14, %ymm5, %ymm9 + # rnd_0: 6 - 6 + xorl %r12d, %eax + addl %edx, %r11d + andl %eax, %ebx + vpor %ymm7, %ymm6, %ymm6 + # rnd_0: 7 - 7 + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + addl %ebx, %r11d + vpor %ymm9, %ymm8, %ymm8 + # rnd_1: 0 - 0 + movl %r8d, %ebx + rorxl $11, %r15d, %ecx + addl 356(%rsp), %r10d + vpsrld $3, %ymm5, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + vpxor %ymm8, %ymm6, %ymm6 + # rnd_1: 2 - 2 + andl %r15d, %ebx + xorl %ecx, %edx + rorxl $13, %r11d, %ecx + vpshufd $0xfa, %ymm2, %ymm7 + # rnd_1: 3 - 3 + addl %edx, %r10d + rorxl $2, %r11d, %edx + xorl %r9d, %ebx + vpxor %ymm6, %ymm9, %ymm5 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + vpsrld $10, %ymm7, %ymm8 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r10d, %r14d + movl %r12d, %ebx + vpsrlq $19, %ymm7, %ymm6 + # rnd_1: 6 - 6 + xorl %r11d, %ebx + addl %edx, %r10d + andl %ebx, %eax + vpsrlq $0x11, %ymm7, %ymm7 + # rnd_1: 7 - 7 + xorl %r12d, %eax + rorxl $6, %r14d, %edx + addl %eax, %r10d + vpaddd %ymm3, %ymm4, %ymm4 + # rnd_0: 0 - 0 + movl %r15d, %eax + rorxl $11, %r14d, %ecx + addl 360(%rsp), %r9d + vpxor %ymm7, %ymm6, %ymm6 + # rnd_0: 1 - 1 + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + vpxor %ymm6, %ymm8, %ymm8 + # rnd_0: 2 - 2 + andl %r14d, %eax + xorl %ecx, %edx + rorxl $13, %r10d, %ecx + vpaddd %ymm5, %ymm4, %ymm4 + # rnd_0: 3 - 3 + addl %edx, %r9d + rorxl $2, %r10d, %edx + xorl %r8d, %eax + vpshufb %ymm11, %ymm8, %ymm8 + # rnd_0: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + vpaddd %ymm8, %ymm4, %ymm4 + # rnd_0: 5 - 5 + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + vpshufd $0x50, %ymm4, %ymm6 + # rnd_0: 6 - 6 + xorl %r10d, %eax + addl %edx, %r9d + andl %eax, %ebx + vpsrlq $0x11, %ymm6, %ymm8 + # rnd_0: 7 - 7 + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + addl %ebx, %r9d + vpsrlq $19, %ymm6, %ymm7 + # rnd_1: 0 - 0 + movl %r14d, %ebx + rorxl $11, %r13d, %ecx + addl 364(%rsp), %r8d + vpsrld $10, %ymm6, %ymm9 + # rnd_1: 1 - 1 + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + vpxor %ymm7, %ymm8, %ymm8 + # rnd_1: 2 - 2 + andl %r13d, %ebx + xorl %ecx, %edx + rorxl $13, %r9d, %ecx + vpxor %ymm8, %ymm9, %ymm9 + # rnd_1: 3 - 3 + addl %edx, %r8d + rorxl $2, %r9d, %edx + xorl %r15d, %ebx + vpshufb %ymm12, %ymm9, %ymm9 + # rnd_1: 4 - 4 + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + vpaddd %ymm4, %ymm9, %ymm3 + # rnd_1: 5 - 5 + xorl %ecx, %edx + addl %r8d, %r12d + movl %r10d, %ebx + vpaddd 480+L_avx2_rorx_sha256_k(%rip), %ymm3, %ymm4 + # rnd_1: 6 - 6 + xorl %r9d, %ebx + addl %edx, %r8d + andl %ebx, %eax + # rnd_1: 7 - 7 + xorl %r10d, %eax + rorxl $6, %r12d, %edx + addl %eax, %r8d + vmovdqu %ymm4, 480(%rsp) + xorl %eax, %eax + xorl %ecx, %ecx + rorxl $6, %r12d, %edx + rorxl $11, %r12d, %ecx + leal (%r8,%rax,1), %r8d + addl 384(%rsp), %r15d + movl %r13d, %eax + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + xorl %ecx, %edx + andl %r12d, %eax + addl %edx, %r15d + rorxl $2, %r8d, %edx + rorxl $13, %r8d, %ecx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + andl %eax, %ebx + addl %edx, %r15d + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + rorxl $11, %r11d, %ecx + addl %ebx, %r15d + addl 388(%rsp), %r14d + movl %r12d, %ebx + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + xorl %ecx, %edx + andl %r11d, %ebx + addl %edx, %r14d + rorxl $2, %r15d, %edx + rorxl $13, %r15d, %ecx + xorl %r13d, %ebx + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + xorl %ecx, %edx + movl %r8d, %ebx + leal (%r10,%r14,1), %r10d + xorl %r15d, %ebx + andl %ebx, %eax + addl %edx, %r14d + xorl %r8d, %eax + rorxl $6, %r10d, %edx + rorxl $11, %r10d, %ecx + leal (%r14,%rax,1), %r14d + addl 392(%rsp), %r13d + movl %r11d, %eax + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + xorl %ecx, %edx + andl %r10d, %eax + addl %edx, %r13d + rorxl $2, %r14d, %edx + rorxl $13, %r14d, %ecx + xorl %r12d, %eax + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + xorl %r14d, %eax + andl %eax, %ebx + addl %edx, %r13d + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + rorxl $11, %r9d, %ecx + addl %ebx, %r13d + addl 396(%rsp), %r12d + movl %r10d, %ebx + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + xorl %ecx, %edx + andl %r9d, %ebx + addl %edx, %r12d + rorxl $2, %r13d, %edx + rorxl $13, %r13d, %ecx + xorl %r11d, %ebx + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + xorl %ecx, %edx + movl %r14d, %ebx + leal (%r8,%r12,1), %r8d + xorl %r13d, %ebx + andl %ebx, %eax + addl %edx, %r12d + xorl %r14d, %eax + rorxl $6, %r8d, %edx + rorxl $11, %r8d, %ecx + leal (%r12,%rax,1), %r12d + addl 416(%rsp), %r11d + movl %r9d, %eax + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + xorl %ecx, %edx + andl %r8d, %eax + addl %edx, %r11d + rorxl $2, %r12d, %edx + rorxl $13, %r12d, %ecx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + andl %eax, %ebx + addl %edx, %r11d + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + rorxl $11, %r15d, %ecx + addl %ebx, %r11d + addl 420(%rsp), %r10d + movl %r8d, %ebx + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + xorl %ecx, %edx + andl %r15d, %ebx + addl %edx, %r10d + rorxl $2, %r11d, %edx + rorxl $13, %r11d, %ecx + xorl %r9d, %ebx + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + xorl %ecx, %edx + movl %r12d, %ebx + leal (%r14,%r10,1), %r14d + xorl %r11d, %ebx + andl %ebx, %eax + addl %edx, %r10d + xorl %r12d, %eax + rorxl $6, %r14d, %edx + rorxl $11, %r14d, %ecx + leal (%r10,%rax,1), %r10d + addl 424(%rsp), %r9d + movl %r15d, %eax + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + xorl %ecx, %edx + andl %r14d, %eax + addl %edx, %r9d + rorxl $2, %r10d, %edx + rorxl $13, %r10d, %ecx + xorl %r8d, %eax + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + xorl %r10d, %eax + andl %eax, %ebx + addl %edx, %r9d + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + rorxl $11, %r13d, %ecx + addl %ebx, %r9d + addl 428(%rsp), %r8d + movl %r14d, %ebx + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + xorl %ecx, %edx + andl %r13d, %ebx + addl %edx, %r8d + rorxl $2, %r9d, %edx + rorxl $13, %r9d, %ecx + xorl %r15d, %ebx + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + xorl %ecx, %edx + movl %r10d, %ebx + leal (%r12,%r8,1), %r12d + xorl %r9d, %ebx + andl %ebx, %eax + addl %edx, %r8d + xorl %r10d, %eax + rorxl $6, %r12d, %edx + rorxl $11, %r12d, %ecx + leal (%r8,%rax,1), %r8d + addl 448(%rsp), %r15d + movl %r13d, %eax + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + xorl %ecx, %edx + andl %r12d, %eax + addl %edx, %r15d + rorxl $2, %r8d, %edx + rorxl $13, %r8d, %ecx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + andl %eax, %ebx + addl %edx, %r15d + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + rorxl $11, %r11d, %ecx + addl %ebx, %r15d + addl 452(%rsp), %r14d + movl %r12d, %ebx + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + xorl %ecx, %edx + andl %r11d, %ebx + addl %edx, %r14d + rorxl $2, %r15d, %edx + rorxl $13, %r15d, %ecx + xorl %r13d, %ebx + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + xorl %ecx, %edx + movl %r8d, %ebx + leal (%r10,%r14,1), %r10d + xorl %r15d, %ebx + andl %ebx, %eax + addl %edx, %r14d + xorl %r8d, %eax + rorxl $6, %r10d, %edx + rorxl $11, %r10d, %ecx + leal (%r14,%rax,1), %r14d + addl 456(%rsp), %r13d + movl %r11d, %eax + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + xorl %ecx, %edx + andl %r10d, %eax + addl %edx, %r13d + rorxl $2, %r14d, %edx + rorxl $13, %r14d, %ecx + xorl %r12d, %eax + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + xorl %r14d, %eax + andl %eax, %ebx + addl %edx, %r13d + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + rorxl $11, %r9d, %ecx + addl %ebx, %r13d + addl 460(%rsp), %r12d + movl %r10d, %ebx + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + xorl %ecx, %edx + andl %r9d, %ebx + addl %edx, %r12d + rorxl $2, %r13d, %edx + rorxl $13, %r13d, %ecx + xorl %r11d, %ebx + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + xorl %ecx, %edx + movl %r14d, %ebx + leal (%r8,%r12,1), %r8d + xorl %r13d, %ebx + andl %ebx, %eax + addl %edx, %r12d + xorl %r14d, %eax + rorxl $6, %r8d, %edx + rorxl $11, %r8d, %ecx + leal (%r12,%rax,1), %r12d + addl 480(%rsp), %r11d + movl %r9d, %eax + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + xorl %ecx, %edx + andl %r8d, %eax + addl %edx, %r11d + rorxl $2, %r12d, %edx + rorxl $13, %r12d, %ecx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + andl %eax, %ebx + addl %edx, %r11d + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + rorxl $11, %r15d, %ecx + addl %ebx, %r11d + addl 484(%rsp), %r10d + movl %r8d, %ebx + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + xorl %ecx, %edx + andl %r15d, %ebx + addl %edx, %r10d + rorxl $2, %r11d, %edx + rorxl $13, %r11d, %ecx + xorl %r9d, %ebx + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + xorl %ecx, %edx + movl %r12d, %ebx + leal (%r14,%r10,1), %r14d + xorl %r11d, %ebx + andl %ebx, %eax + addl %edx, %r10d + xorl %r12d, %eax + rorxl $6, %r14d, %edx + rorxl $11, %r14d, %ecx + leal (%r10,%rax,1), %r10d + addl 488(%rsp), %r9d + movl %r15d, %eax + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + xorl %ecx, %edx + andl %r14d, %eax + addl %edx, %r9d + rorxl $2, %r10d, %edx + rorxl $13, %r10d, %ecx + xorl %r8d, %eax + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + xorl %r10d, %eax + andl %eax, %ebx + addl %edx, %r9d + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + rorxl $11, %r13d, %ecx + addl %ebx, %r9d + addl 492(%rsp), %r8d + movl %r14d, %ebx + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + xorl %ecx, %edx + andl %r13d, %ebx + addl %edx, %r8d + rorxl $2, %r9d, %edx + rorxl $13, %r9d, %ecx + xorl %r15d, %ebx + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + xorl %ecx, %edx + movl %r10d, %ebx + leal (%r12,%r8,1), %r12d + xorl %r9d, %ebx + andl %ebx, %eax + addl %edx, %r8d + xorl %r10d, %eax + addl %eax, %r8d + xorl %ecx, %ecx + addl (%rdi), %r8d + addl 4(%rdi), %r9d + addl 8(%rdi), %r10d + addl 12(%rdi), %r11d + addl 16(%rdi), %r12d + addl 20(%rdi), %r13d + addl 24(%rdi), %r14d + addl 28(%rdi), %r15d + movl %r8d, (%rdi) + movl %r9d, 4(%rdi) + movl %r10d, 8(%rdi) + movl %r11d, 12(%rdi) + movl %r12d, 16(%rdi) + movl %r13d, 20(%rdi) + movl %r14d, 24(%rdi) + movl %r15d, 28(%rdi) + movl %r9d, %ebx + xorl %eax, %eax + xorl %r10d, %ebx + rorxl $6, %r12d, %edx + rorxl $11, %r12d, %ecx + leal (%r8,%rax,1), %r8d + addl 16(%rsp), %r15d + movl %r13d, %eax + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + xorl %ecx, %edx + andl %r12d, %eax + addl %edx, %r15d + rorxl $2, %r8d, %edx + rorxl $13, %r8d, %ecx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + andl %eax, %ebx + addl %edx, %r15d + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + rorxl $11, %r11d, %ecx + addl %ebx, %r15d + addl 20(%rsp), %r14d + movl %r12d, %ebx + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + xorl %ecx, %edx + andl %r11d, %ebx + addl %edx, %r14d + rorxl $2, %r15d, %edx + rorxl $13, %r15d, %ecx + xorl %r13d, %ebx + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + xorl %ecx, %edx + movl %r8d, %ebx + leal (%r10,%r14,1), %r10d + xorl %r15d, %ebx + andl %ebx, %eax + addl %edx, %r14d + xorl %r8d, %eax + rorxl $6, %r10d, %edx + rorxl $11, %r10d, %ecx + leal (%r14,%rax,1), %r14d + addl 24(%rsp), %r13d + movl %r11d, %eax + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + xorl %ecx, %edx + andl %r10d, %eax + addl %edx, %r13d + rorxl $2, %r14d, %edx + rorxl $13, %r14d, %ecx + xorl %r12d, %eax + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + xorl %r14d, %eax + andl %eax, %ebx + addl %edx, %r13d + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + rorxl $11, %r9d, %ecx + addl %ebx, %r13d + addl 28(%rsp), %r12d + movl %r10d, %ebx + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + xorl %ecx, %edx + andl %r9d, %ebx + addl %edx, %r12d + rorxl $2, %r13d, %edx + rorxl $13, %r13d, %ecx + xorl %r11d, %ebx + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + xorl %ecx, %edx + movl %r14d, %ebx + leal (%r8,%r12,1), %r8d + xorl %r13d, %ebx + andl %ebx, %eax + addl %edx, %r12d + xorl %r14d, %eax + rorxl $6, %r8d, %edx + rorxl $11, %r8d, %ecx + leal (%r12,%rax,1), %r12d + addl 48(%rsp), %r11d + movl %r9d, %eax + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + xorl %ecx, %edx + andl %r8d, %eax + addl %edx, %r11d + rorxl $2, %r12d, %edx + rorxl $13, %r12d, %ecx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + andl %eax, %ebx + addl %edx, %r11d + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + rorxl $11, %r15d, %ecx + addl %ebx, %r11d + addl 52(%rsp), %r10d + movl %r8d, %ebx + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + xorl %ecx, %edx + andl %r15d, %ebx + addl %edx, %r10d + rorxl $2, %r11d, %edx + rorxl $13, %r11d, %ecx + xorl %r9d, %ebx + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + xorl %ecx, %edx + movl %r12d, %ebx + leal (%r14,%r10,1), %r14d + xorl %r11d, %ebx + andl %ebx, %eax + addl %edx, %r10d + xorl %r12d, %eax + rorxl $6, %r14d, %edx + rorxl $11, %r14d, %ecx + leal (%r10,%rax,1), %r10d + addl 56(%rsp), %r9d + movl %r15d, %eax + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + xorl %ecx, %edx + andl %r14d, %eax + addl %edx, %r9d + rorxl $2, %r10d, %edx + rorxl $13, %r10d, %ecx + xorl %r8d, %eax + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + xorl %r10d, %eax + andl %eax, %ebx + addl %edx, %r9d + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + rorxl $11, %r13d, %ecx + addl %ebx, %r9d + addl 60(%rsp), %r8d + movl %r14d, %ebx + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + xorl %ecx, %edx + andl %r13d, %ebx + addl %edx, %r8d + rorxl $2, %r9d, %edx + rorxl $13, %r9d, %ecx + xorl %r15d, %ebx + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + xorl %ecx, %edx + movl %r10d, %ebx + leal (%r12,%r8,1), %r12d + xorl %r9d, %ebx + andl %ebx, %eax + addl %edx, %r8d + xorl %r10d, %eax + rorxl $6, %r12d, %edx + rorxl $11, %r12d, %ecx + leal (%r8,%rax,1), %r8d + addl 80(%rsp), %r15d + movl %r13d, %eax + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + xorl %ecx, %edx + andl %r12d, %eax + addl %edx, %r15d + rorxl $2, %r8d, %edx + rorxl $13, %r8d, %ecx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + andl %eax, %ebx + addl %edx, %r15d + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + rorxl $11, %r11d, %ecx + addl %ebx, %r15d + addl 84(%rsp), %r14d + movl %r12d, %ebx + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + xorl %ecx, %edx + andl %r11d, %ebx + addl %edx, %r14d + rorxl $2, %r15d, %edx + rorxl $13, %r15d, %ecx + xorl %r13d, %ebx + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + xorl %ecx, %edx + movl %r8d, %ebx + leal (%r10,%r14,1), %r10d + xorl %r15d, %ebx + andl %ebx, %eax + addl %edx, %r14d + xorl %r8d, %eax + rorxl $6, %r10d, %edx + rorxl $11, %r10d, %ecx + leal (%r14,%rax,1), %r14d + addl 88(%rsp), %r13d + movl %r11d, %eax + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + xorl %ecx, %edx + andl %r10d, %eax + addl %edx, %r13d + rorxl $2, %r14d, %edx + rorxl $13, %r14d, %ecx + xorl %r12d, %eax + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + xorl %r14d, %eax + andl %eax, %ebx + addl %edx, %r13d + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + rorxl $11, %r9d, %ecx + addl %ebx, %r13d + addl 92(%rsp), %r12d + movl %r10d, %ebx + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + xorl %ecx, %edx + andl %r9d, %ebx + addl %edx, %r12d + rorxl $2, %r13d, %edx + rorxl $13, %r13d, %ecx + xorl %r11d, %ebx + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + xorl %ecx, %edx + movl %r14d, %ebx + leal (%r8,%r12,1), %r8d + xorl %r13d, %ebx + andl %ebx, %eax + addl %edx, %r12d + xorl %r14d, %eax + rorxl $6, %r8d, %edx + rorxl $11, %r8d, %ecx + leal (%r12,%rax,1), %r12d + addl 112(%rsp), %r11d + movl %r9d, %eax + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + xorl %ecx, %edx + andl %r8d, %eax + addl %edx, %r11d + rorxl $2, %r12d, %edx + rorxl $13, %r12d, %ecx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + andl %eax, %ebx + addl %edx, %r11d + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + rorxl $11, %r15d, %ecx + addl %ebx, %r11d + addl 116(%rsp), %r10d + movl %r8d, %ebx + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + xorl %ecx, %edx + andl %r15d, %ebx + addl %edx, %r10d + rorxl $2, %r11d, %edx + rorxl $13, %r11d, %ecx + xorl %r9d, %ebx + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + xorl %ecx, %edx + movl %r12d, %ebx + leal (%r14,%r10,1), %r14d + xorl %r11d, %ebx + andl %ebx, %eax + addl %edx, %r10d + xorl %r12d, %eax + rorxl $6, %r14d, %edx + rorxl $11, %r14d, %ecx + leal (%r10,%rax,1), %r10d + addl 120(%rsp), %r9d + movl %r15d, %eax + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + xorl %ecx, %edx + andl %r14d, %eax + addl %edx, %r9d + rorxl $2, %r10d, %edx + rorxl $13, %r10d, %ecx + xorl %r8d, %eax + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + xorl %r10d, %eax + andl %eax, %ebx + addl %edx, %r9d + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + rorxl $11, %r13d, %ecx + addl %ebx, %r9d + addl 124(%rsp), %r8d + movl %r14d, %ebx + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + xorl %ecx, %edx + andl %r13d, %ebx + addl %edx, %r8d + rorxl $2, %r9d, %edx + rorxl $13, %r9d, %ecx + xorl %r15d, %ebx + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + xorl %ecx, %edx + movl %r10d, %ebx + leal (%r12,%r8,1), %r12d + xorl %r9d, %ebx + andl %ebx, %eax + addl %edx, %r8d + xorl %r10d, %eax + rorxl $6, %r12d, %edx + rorxl $11, %r12d, %ecx + leal (%r8,%rax,1), %r8d + addl 144(%rsp), %r15d + movl %r13d, %eax + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + xorl %ecx, %edx + andl %r12d, %eax + addl %edx, %r15d + rorxl $2, %r8d, %edx + rorxl $13, %r8d, %ecx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + andl %eax, %ebx + addl %edx, %r15d + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + rorxl $11, %r11d, %ecx + addl %ebx, %r15d + addl 148(%rsp), %r14d + movl %r12d, %ebx + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + xorl %ecx, %edx + andl %r11d, %ebx + addl %edx, %r14d + rorxl $2, %r15d, %edx + rorxl $13, %r15d, %ecx + xorl %r13d, %ebx + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + xorl %ecx, %edx + movl %r8d, %ebx + leal (%r10,%r14,1), %r10d + xorl %r15d, %ebx + andl %ebx, %eax + addl %edx, %r14d + xorl %r8d, %eax + rorxl $6, %r10d, %edx + rorxl $11, %r10d, %ecx + leal (%r14,%rax,1), %r14d + addl 152(%rsp), %r13d + movl %r11d, %eax + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + xorl %ecx, %edx + andl %r10d, %eax + addl %edx, %r13d + rorxl $2, %r14d, %edx + rorxl $13, %r14d, %ecx + xorl %r12d, %eax + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + xorl %r14d, %eax + andl %eax, %ebx + addl %edx, %r13d + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + rorxl $11, %r9d, %ecx + addl %ebx, %r13d + addl 156(%rsp), %r12d + movl %r10d, %ebx + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + xorl %ecx, %edx + andl %r9d, %ebx + addl %edx, %r12d + rorxl $2, %r13d, %edx + rorxl $13, %r13d, %ecx + xorl %r11d, %ebx + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + xorl %ecx, %edx + movl %r14d, %ebx + leal (%r8,%r12,1), %r8d + xorl %r13d, %ebx + andl %ebx, %eax + addl %edx, %r12d + xorl %r14d, %eax + rorxl $6, %r8d, %edx + rorxl $11, %r8d, %ecx + leal (%r12,%rax,1), %r12d + addl 176(%rsp), %r11d + movl %r9d, %eax + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + xorl %ecx, %edx + andl %r8d, %eax + addl %edx, %r11d + rorxl $2, %r12d, %edx + rorxl $13, %r12d, %ecx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + andl %eax, %ebx + addl %edx, %r11d + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + rorxl $11, %r15d, %ecx + addl %ebx, %r11d + addl 180(%rsp), %r10d + movl %r8d, %ebx + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + xorl %ecx, %edx + andl %r15d, %ebx + addl %edx, %r10d + rorxl $2, %r11d, %edx + rorxl $13, %r11d, %ecx + xorl %r9d, %ebx + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + xorl %ecx, %edx + movl %r12d, %ebx + leal (%r14,%r10,1), %r14d + xorl %r11d, %ebx + andl %ebx, %eax + addl %edx, %r10d + xorl %r12d, %eax + rorxl $6, %r14d, %edx + rorxl $11, %r14d, %ecx + leal (%r10,%rax,1), %r10d + addl 184(%rsp), %r9d + movl %r15d, %eax + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + xorl %ecx, %edx + andl %r14d, %eax + addl %edx, %r9d + rorxl $2, %r10d, %edx + rorxl $13, %r10d, %ecx + xorl %r8d, %eax + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + xorl %r10d, %eax + andl %eax, %ebx + addl %edx, %r9d + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + rorxl $11, %r13d, %ecx + addl %ebx, %r9d + addl 188(%rsp), %r8d + movl %r14d, %ebx + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + xorl %ecx, %edx + andl %r13d, %ebx + addl %edx, %r8d + rorxl $2, %r9d, %edx + rorxl $13, %r9d, %ecx + xorl %r15d, %ebx + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + xorl %ecx, %edx + movl %r10d, %ebx + leal (%r12,%r8,1), %r12d + xorl %r9d, %ebx + andl %ebx, %eax + addl %edx, %r8d + xorl %r10d, %eax + rorxl $6, %r12d, %edx + rorxl $11, %r12d, %ecx + leal (%r8,%rax,1), %r8d + addl 208(%rsp), %r15d + movl %r13d, %eax + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + xorl %ecx, %edx + andl %r12d, %eax + addl %edx, %r15d + rorxl $2, %r8d, %edx + rorxl $13, %r8d, %ecx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + andl %eax, %ebx + addl %edx, %r15d + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + rorxl $11, %r11d, %ecx + addl %ebx, %r15d + addl 212(%rsp), %r14d + movl %r12d, %ebx + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + xorl %ecx, %edx + andl %r11d, %ebx + addl %edx, %r14d + rorxl $2, %r15d, %edx + rorxl $13, %r15d, %ecx + xorl %r13d, %ebx + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + xorl %ecx, %edx + movl %r8d, %ebx + leal (%r10,%r14,1), %r10d + xorl %r15d, %ebx + andl %ebx, %eax + addl %edx, %r14d + xorl %r8d, %eax + rorxl $6, %r10d, %edx + rorxl $11, %r10d, %ecx + leal (%r14,%rax,1), %r14d + addl 216(%rsp), %r13d + movl %r11d, %eax + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + xorl %ecx, %edx + andl %r10d, %eax + addl %edx, %r13d + rorxl $2, %r14d, %edx + rorxl $13, %r14d, %ecx + xorl %r12d, %eax + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + xorl %r14d, %eax + andl %eax, %ebx + addl %edx, %r13d + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + rorxl $11, %r9d, %ecx + addl %ebx, %r13d + addl 220(%rsp), %r12d + movl %r10d, %ebx + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + xorl %ecx, %edx + andl %r9d, %ebx + addl %edx, %r12d + rorxl $2, %r13d, %edx + rorxl $13, %r13d, %ecx + xorl %r11d, %ebx + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + xorl %ecx, %edx + movl %r14d, %ebx + leal (%r8,%r12,1), %r8d + xorl %r13d, %ebx + andl %ebx, %eax + addl %edx, %r12d + xorl %r14d, %eax + rorxl $6, %r8d, %edx + rorxl $11, %r8d, %ecx + leal (%r12,%rax,1), %r12d + addl 240(%rsp), %r11d + movl %r9d, %eax + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + xorl %ecx, %edx + andl %r8d, %eax + addl %edx, %r11d + rorxl $2, %r12d, %edx + rorxl $13, %r12d, %ecx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + andl %eax, %ebx + addl %edx, %r11d + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + rorxl $11, %r15d, %ecx + addl %ebx, %r11d + addl 244(%rsp), %r10d + movl %r8d, %ebx + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + xorl %ecx, %edx + andl %r15d, %ebx + addl %edx, %r10d + rorxl $2, %r11d, %edx + rorxl $13, %r11d, %ecx + xorl %r9d, %ebx + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + xorl %ecx, %edx + movl %r12d, %ebx + leal (%r14,%r10,1), %r14d + xorl %r11d, %ebx + andl %ebx, %eax + addl %edx, %r10d + xorl %r12d, %eax + rorxl $6, %r14d, %edx + rorxl $11, %r14d, %ecx + leal (%r10,%rax,1), %r10d + addl 248(%rsp), %r9d + movl %r15d, %eax + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + xorl %ecx, %edx + andl %r14d, %eax + addl %edx, %r9d + rorxl $2, %r10d, %edx + rorxl $13, %r10d, %ecx + xorl %r8d, %eax + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + xorl %r10d, %eax + andl %eax, %ebx + addl %edx, %r9d + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + rorxl $11, %r13d, %ecx + addl %ebx, %r9d + addl 252(%rsp), %r8d + movl %r14d, %ebx + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + xorl %ecx, %edx + andl %r13d, %ebx + addl %edx, %r8d + rorxl $2, %r9d, %edx + rorxl $13, %r9d, %ecx + xorl %r15d, %ebx + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + xorl %ecx, %edx + movl %r10d, %ebx + leal (%r12,%r8,1), %r12d + xorl %r9d, %ebx + andl %ebx, %eax + addl %edx, %r8d + xorl %r10d, %eax + rorxl $6, %r12d, %edx + rorxl $11, %r12d, %ecx + leal (%r8,%rax,1), %r8d + addl 272(%rsp), %r15d + movl %r13d, %eax + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + xorl %ecx, %edx + andl %r12d, %eax + addl %edx, %r15d + rorxl $2, %r8d, %edx + rorxl $13, %r8d, %ecx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + andl %eax, %ebx + addl %edx, %r15d + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + rorxl $11, %r11d, %ecx + addl %ebx, %r15d + addl 276(%rsp), %r14d + movl %r12d, %ebx + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + xorl %ecx, %edx + andl %r11d, %ebx + addl %edx, %r14d + rorxl $2, %r15d, %edx + rorxl $13, %r15d, %ecx + xorl %r13d, %ebx + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + xorl %ecx, %edx + movl %r8d, %ebx + leal (%r10,%r14,1), %r10d + xorl %r15d, %ebx + andl %ebx, %eax + addl %edx, %r14d + xorl %r8d, %eax + rorxl $6, %r10d, %edx + rorxl $11, %r10d, %ecx + leal (%r14,%rax,1), %r14d + addl 280(%rsp), %r13d + movl %r11d, %eax + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + xorl %ecx, %edx + andl %r10d, %eax + addl %edx, %r13d + rorxl $2, %r14d, %edx + rorxl $13, %r14d, %ecx + xorl %r12d, %eax + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + xorl %r14d, %eax + andl %eax, %ebx + addl %edx, %r13d + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + rorxl $11, %r9d, %ecx + addl %ebx, %r13d + addl 284(%rsp), %r12d + movl %r10d, %ebx + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + xorl %ecx, %edx + andl %r9d, %ebx + addl %edx, %r12d + rorxl $2, %r13d, %edx + rorxl $13, %r13d, %ecx + xorl %r11d, %ebx + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + xorl %ecx, %edx + movl %r14d, %ebx + leal (%r8,%r12,1), %r8d + xorl %r13d, %ebx + andl %ebx, %eax + addl %edx, %r12d + xorl %r14d, %eax + rorxl $6, %r8d, %edx + rorxl $11, %r8d, %ecx + leal (%r12,%rax,1), %r12d + addl 304(%rsp), %r11d + movl %r9d, %eax + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + xorl %ecx, %edx + andl %r8d, %eax + addl %edx, %r11d + rorxl $2, %r12d, %edx + rorxl $13, %r12d, %ecx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + andl %eax, %ebx + addl %edx, %r11d + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + rorxl $11, %r15d, %ecx + addl %ebx, %r11d + addl 308(%rsp), %r10d + movl %r8d, %ebx + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + xorl %ecx, %edx + andl %r15d, %ebx + addl %edx, %r10d + rorxl $2, %r11d, %edx + rorxl $13, %r11d, %ecx + xorl %r9d, %ebx + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + xorl %ecx, %edx + movl %r12d, %ebx + leal (%r14,%r10,1), %r14d + xorl %r11d, %ebx + andl %ebx, %eax + addl %edx, %r10d + xorl %r12d, %eax + rorxl $6, %r14d, %edx + rorxl $11, %r14d, %ecx + leal (%r10,%rax,1), %r10d + addl 312(%rsp), %r9d + movl %r15d, %eax + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + xorl %ecx, %edx + andl %r14d, %eax + addl %edx, %r9d + rorxl $2, %r10d, %edx + rorxl $13, %r10d, %ecx + xorl %r8d, %eax + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + xorl %r10d, %eax + andl %eax, %ebx + addl %edx, %r9d + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + rorxl $11, %r13d, %ecx + addl %ebx, %r9d + addl 316(%rsp), %r8d + movl %r14d, %ebx + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + xorl %ecx, %edx + andl %r13d, %ebx + addl %edx, %r8d + rorxl $2, %r9d, %edx + rorxl $13, %r9d, %ecx + xorl %r15d, %ebx + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + xorl %ecx, %edx + movl %r10d, %ebx + leal (%r12,%r8,1), %r12d + xorl %r9d, %ebx + andl %ebx, %eax + addl %edx, %r8d + xorl %r10d, %eax + rorxl $6, %r12d, %edx + rorxl $11, %r12d, %ecx + leal (%r8,%rax,1), %r8d + addl 336(%rsp), %r15d + movl %r13d, %eax + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + xorl %ecx, %edx + andl %r12d, %eax + addl %edx, %r15d + rorxl $2, %r8d, %edx + rorxl $13, %r8d, %ecx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + andl %eax, %ebx + addl %edx, %r15d + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + rorxl $11, %r11d, %ecx + addl %ebx, %r15d + addl 340(%rsp), %r14d + movl %r12d, %ebx + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + xorl %ecx, %edx + andl %r11d, %ebx + addl %edx, %r14d + rorxl $2, %r15d, %edx + rorxl $13, %r15d, %ecx + xorl %r13d, %ebx + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + xorl %ecx, %edx + movl %r8d, %ebx + leal (%r10,%r14,1), %r10d + xorl %r15d, %ebx + andl %ebx, %eax + addl %edx, %r14d + xorl %r8d, %eax + rorxl $6, %r10d, %edx + rorxl $11, %r10d, %ecx + leal (%r14,%rax,1), %r14d + addl 344(%rsp), %r13d + movl %r11d, %eax + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + xorl %ecx, %edx + andl %r10d, %eax + addl %edx, %r13d + rorxl $2, %r14d, %edx + rorxl $13, %r14d, %ecx + xorl %r12d, %eax + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + xorl %r14d, %eax + andl %eax, %ebx + addl %edx, %r13d + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + rorxl $11, %r9d, %ecx + addl %ebx, %r13d + addl 348(%rsp), %r12d + movl %r10d, %ebx + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + xorl %ecx, %edx + andl %r9d, %ebx + addl %edx, %r12d + rorxl $2, %r13d, %edx + rorxl $13, %r13d, %ecx + xorl %r11d, %ebx + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + xorl %ecx, %edx + movl %r14d, %ebx + leal (%r8,%r12,1), %r8d + xorl %r13d, %ebx + andl %ebx, %eax + addl %edx, %r12d + xorl %r14d, %eax + rorxl $6, %r8d, %edx + rorxl $11, %r8d, %ecx + leal (%r12,%rax,1), %r12d + addl 368(%rsp), %r11d + movl %r9d, %eax + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + xorl %ecx, %edx + andl %r8d, %eax + addl %edx, %r11d + rorxl $2, %r12d, %edx + rorxl $13, %r12d, %ecx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + andl %eax, %ebx + addl %edx, %r11d + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + rorxl $11, %r15d, %ecx + addl %ebx, %r11d + addl 372(%rsp), %r10d + movl %r8d, %ebx + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + xorl %ecx, %edx + andl %r15d, %ebx + addl %edx, %r10d + rorxl $2, %r11d, %edx + rorxl $13, %r11d, %ecx + xorl %r9d, %ebx + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + xorl %ecx, %edx + movl %r12d, %ebx + leal (%r14,%r10,1), %r14d + xorl %r11d, %ebx + andl %ebx, %eax + addl %edx, %r10d + xorl %r12d, %eax + rorxl $6, %r14d, %edx + rorxl $11, %r14d, %ecx + leal (%r10,%rax,1), %r10d + addl 376(%rsp), %r9d + movl %r15d, %eax + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + xorl %ecx, %edx + andl %r14d, %eax + addl %edx, %r9d + rorxl $2, %r10d, %edx + rorxl $13, %r10d, %ecx + xorl %r8d, %eax + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + xorl %r10d, %eax + andl %eax, %ebx + addl %edx, %r9d + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + rorxl $11, %r13d, %ecx + addl %ebx, %r9d + addl 380(%rsp), %r8d + movl %r14d, %ebx + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + xorl %ecx, %edx + andl %r13d, %ebx + addl %edx, %r8d + rorxl $2, %r9d, %edx + rorxl $13, %r9d, %ecx + xorl %r15d, %ebx + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + xorl %ecx, %edx + movl %r10d, %ebx + leal (%r12,%r8,1), %r12d + xorl %r9d, %ebx + andl %ebx, %eax + addl %edx, %r8d + xorl %r10d, %eax + rorxl $6, %r12d, %edx + rorxl $11, %r12d, %ecx + leal (%r8,%rax,1), %r8d + addl 400(%rsp), %r15d + movl %r13d, %eax + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + xorl %ecx, %edx + andl %r12d, %eax + addl %edx, %r15d + rorxl $2, %r8d, %edx + rorxl $13, %r8d, %ecx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + andl %eax, %ebx + addl %edx, %r15d + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + rorxl $11, %r11d, %ecx + addl %ebx, %r15d + addl 404(%rsp), %r14d + movl %r12d, %ebx + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + xorl %ecx, %edx + andl %r11d, %ebx + addl %edx, %r14d + rorxl $2, %r15d, %edx + rorxl $13, %r15d, %ecx + xorl %r13d, %ebx + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + xorl %ecx, %edx + movl %r8d, %ebx + leal (%r10,%r14,1), %r10d + xorl %r15d, %ebx + andl %ebx, %eax + addl %edx, %r14d + xorl %r8d, %eax + rorxl $6, %r10d, %edx + rorxl $11, %r10d, %ecx + leal (%r14,%rax,1), %r14d + addl 408(%rsp), %r13d + movl %r11d, %eax + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + xorl %ecx, %edx + andl %r10d, %eax + addl %edx, %r13d + rorxl $2, %r14d, %edx + rorxl $13, %r14d, %ecx + xorl %r12d, %eax + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + xorl %r14d, %eax + andl %eax, %ebx + addl %edx, %r13d + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + rorxl $11, %r9d, %ecx + addl %ebx, %r13d + addl 412(%rsp), %r12d + movl %r10d, %ebx + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + xorl %ecx, %edx + andl %r9d, %ebx + addl %edx, %r12d + rorxl $2, %r13d, %edx + rorxl $13, %r13d, %ecx + xorl %r11d, %ebx + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + xorl %ecx, %edx + movl %r14d, %ebx + leal (%r8,%r12,1), %r8d + xorl %r13d, %ebx + andl %ebx, %eax + addl %edx, %r12d + xorl %r14d, %eax + rorxl $6, %r8d, %edx + rorxl $11, %r8d, %ecx + leal (%r12,%rax,1), %r12d + addl 432(%rsp), %r11d + movl %r9d, %eax + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + xorl %ecx, %edx + andl %r8d, %eax + addl %edx, %r11d + rorxl $2, %r12d, %edx + rorxl $13, %r12d, %ecx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + andl %eax, %ebx + addl %edx, %r11d + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + rorxl $11, %r15d, %ecx + addl %ebx, %r11d + addl 436(%rsp), %r10d + movl %r8d, %ebx + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + xorl %ecx, %edx + andl %r15d, %ebx + addl %edx, %r10d + rorxl $2, %r11d, %edx + rorxl $13, %r11d, %ecx + xorl %r9d, %ebx + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + xorl %ecx, %edx + movl %r12d, %ebx + leal (%r14,%r10,1), %r14d + xorl %r11d, %ebx + andl %ebx, %eax + addl %edx, %r10d + xorl %r12d, %eax + rorxl $6, %r14d, %edx + rorxl $11, %r14d, %ecx + leal (%r10,%rax,1), %r10d + addl 440(%rsp), %r9d + movl %r15d, %eax + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + xorl %ecx, %edx + andl %r14d, %eax + addl %edx, %r9d + rorxl $2, %r10d, %edx + rorxl $13, %r10d, %ecx + xorl %r8d, %eax + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + xorl %r10d, %eax + andl %eax, %ebx + addl %edx, %r9d + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + rorxl $11, %r13d, %ecx + addl %ebx, %r9d + addl 444(%rsp), %r8d + movl %r14d, %ebx + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + xorl %ecx, %edx + andl %r13d, %ebx + addl %edx, %r8d + rorxl $2, %r9d, %edx + rorxl $13, %r9d, %ecx + xorl %r15d, %ebx + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + xorl %ecx, %edx + movl %r10d, %ebx + leal (%r12,%r8,1), %r12d + xorl %r9d, %ebx + andl %ebx, %eax + addl %edx, %r8d + xorl %r10d, %eax + rorxl $6, %r12d, %edx + rorxl $11, %r12d, %ecx + leal (%r8,%rax,1), %r8d + addl 464(%rsp), %r15d + movl %r13d, %eax + xorl %edx, %ecx + xorl %r14d, %eax + rorxl $25, %r12d, %edx + xorl %ecx, %edx + andl %r12d, %eax + addl %edx, %r15d + rorxl $2, %r8d, %edx + rorxl $13, %r8d, %ecx + xorl %r14d, %eax + xorl %edx, %ecx + rorxl $22, %r8d, %edx + addl %eax, %r15d + xorl %ecx, %edx + movl %r9d, %eax + addl %r15d, %r11d + xorl %r8d, %eax + andl %eax, %ebx + addl %edx, %r15d + xorl %r9d, %ebx + rorxl $6, %r11d, %edx + rorxl $11, %r11d, %ecx + addl %ebx, %r15d + addl 468(%rsp), %r14d + movl %r12d, %ebx + xorl %edx, %ecx + xorl %r13d, %ebx + rorxl $25, %r11d, %edx + xorl %ecx, %edx + andl %r11d, %ebx + addl %edx, %r14d + rorxl $2, %r15d, %edx + rorxl $13, %r15d, %ecx + xorl %r13d, %ebx + xorl %edx, %ecx + rorxl $22, %r15d, %edx + addl %ebx, %r14d + xorl %ecx, %edx + movl %r8d, %ebx + leal (%r10,%r14,1), %r10d + xorl %r15d, %ebx + andl %ebx, %eax + addl %edx, %r14d + xorl %r8d, %eax + rorxl $6, %r10d, %edx + rorxl $11, %r10d, %ecx + leal (%r14,%rax,1), %r14d + addl 472(%rsp), %r13d + movl %r11d, %eax + xorl %edx, %ecx + xorl %r12d, %eax + rorxl $25, %r10d, %edx + xorl %ecx, %edx + andl %r10d, %eax + addl %edx, %r13d + rorxl $2, %r14d, %edx + rorxl $13, %r14d, %ecx + xorl %r12d, %eax + xorl %edx, %ecx + rorxl $22, %r14d, %edx + addl %eax, %r13d + xorl %ecx, %edx + movl %r15d, %eax + addl %r13d, %r9d + xorl %r14d, %eax + andl %eax, %ebx + addl %edx, %r13d + xorl %r15d, %ebx + rorxl $6, %r9d, %edx + rorxl $11, %r9d, %ecx + addl %ebx, %r13d + addl 476(%rsp), %r12d + movl %r10d, %ebx + xorl %edx, %ecx + xorl %r11d, %ebx + rorxl $25, %r9d, %edx + xorl %ecx, %edx + andl %r9d, %ebx + addl %edx, %r12d + rorxl $2, %r13d, %edx + rorxl $13, %r13d, %ecx + xorl %r11d, %ebx + xorl %edx, %ecx + rorxl $22, %r13d, %edx + addl %ebx, %r12d + xorl %ecx, %edx + movl %r14d, %ebx + leal (%r8,%r12,1), %r8d + xorl %r13d, %ebx + andl %ebx, %eax + addl %edx, %r12d + xorl %r14d, %eax + rorxl $6, %r8d, %edx + rorxl $11, %r8d, %ecx + leal (%r12,%rax,1), %r12d + addl 496(%rsp), %r11d + movl %r9d, %eax + xorl %edx, %ecx + xorl %r10d, %eax + rorxl $25, %r8d, %edx + xorl %ecx, %edx + andl %r8d, %eax + addl %edx, %r11d + rorxl $2, %r12d, %edx + rorxl $13, %r12d, %ecx + xorl %r10d, %eax + xorl %edx, %ecx + rorxl $22, %r12d, %edx + addl %eax, %r11d + xorl %ecx, %edx + movl %r13d, %eax + addl %r11d, %r15d + xorl %r12d, %eax + andl %eax, %ebx + addl %edx, %r11d + xorl %r13d, %ebx + rorxl $6, %r15d, %edx + rorxl $11, %r15d, %ecx + addl %ebx, %r11d + addl 500(%rsp), %r10d + movl %r8d, %ebx + xorl %edx, %ecx + xorl %r9d, %ebx + rorxl $25, %r15d, %edx + xorl %ecx, %edx + andl %r15d, %ebx + addl %edx, %r10d + rorxl $2, %r11d, %edx + rorxl $13, %r11d, %ecx + xorl %r9d, %ebx + xorl %edx, %ecx + rorxl $22, %r11d, %edx + addl %ebx, %r10d + xorl %ecx, %edx + movl %r12d, %ebx + leal (%r14,%r10,1), %r14d + xorl %r11d, %ebx + andl %ebx, %eax + addl %edx, %r10d + xorl %r12d, %eax + rorxl $6, %r14d, %edx + rorxl $11, %r14d, %ecx + leal (%r10,%rax,1), %r10d + addl 504(%rsp), %r9d + movl %r15d, %eax + xorl %edx, %ecx + xorl %r8d, %eax + rorxl $25, %r14d, %edx + xorl %ecx, %edx + andl %r14d, %eax + addl %edx, %r9d + rorxl $2, %r10d, %edx + rorxl $13, %r10d, %ecx + xorl %r8d, %eax + xorl %edx, %ecx + rorxl $22, %r10d, %edx + addl %eax, %r9d + xorl %ecx, %edx + movl %r11d, %eax + addl %r9d, %r13d + xorl %r10d, %eax + andl %eax, %ebx + addl %edx, %r9d + xorl %r11d, %ebx + rorxl $6, %r13d, %edx + rorxl $11, %r13d, %ecx + addl %ebx, %r9d + addl 508(%rsp), %r8d + movl %r14d, %ebx + xorl %edx, %ecx + xorl %r15d, %ebx + rorxl $25, %r13d, %edx + xorl %ecx, %edx + andl %r13d, %ebx + addl %edx, %r8d + rorxl $2, %r9d, %edx + rorxl $13, %r9d, %ecx + xorl %r15d, %ebx + xorl %edx, %ecx + rorxl $22, %r9d, %edx + addl %ebx, %r8d + xorl %ecx, %edx + movl %r10d, %ebx + leal (%r12,%r8,1), %r12d + xorl %r9d, %ebx + andl %ebx, %eax + addl %edx, %r8d + xorl %r10d, %eax + addl %eax, %r8d + addq $0x80, %rbp + addl (%rdi), %r8d + addl 4(%rdi), %r9d + addl 8(%rdi), %r10d + addl 12(%rdi), %r11d + addl 16(%rdi), %r12d + addl 20(%rdi), %r13d + addl 24(%rdi), %r14d + addl 28(%rdi), %r15d + subl $0x80, %esi + movl %r8d, (%rdi) + movl %r9d, 4(%rdi) + movl %r10d, 8(%rdi) + movl %r11d, 12(%rdi) + movl %r12d, 16(%rdi) + movl %r13d, 20(%rdi) + movl %r14d, 24(%rdi) + movl %r15d, 28(%rdi) + jnz L_sha256_len_avx2_rorx_start +L_sha256_len_avx2_rorx_done: + xorq %rax, %rax + vzeroupper + addq $0x200, %rsp + popq %rbp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + repz retq +#ifndef __APPLE__ +.size Transform_Sha256_AVX2_RORX_Len,.-Transform_Sha256_AVX2_RORX_Len +#endif /* __APPLE__ */ +#endif /* HAVE_INTEL_AVX2 */ |