diff options
Diffstat (limited to 'arch/x86/crypto')
-rw-r--r-- | arch/x86/crypto/sha256-avx2-asm.S | 18 |
1 files changed, 12 insertions, 6 deletions
diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S index 3eada9416852..e2a4024fb0a3 100644 --- a/arch/x86/crypto/sha256-avx2-asm.S +++ b/arch/x86/crypto/sha256-avx2-asm.S @@ -589,19 +589,23 @@ last_block_enter: .align 16 loop1: - vpaddd K256+0*32(SRND), X0, XFER + leaq K256+0*32(%rip), INP ## reuse INP as scratch reg + vpaddd (INP, SRND), X0, XFER vmovdqa XFER, 0*32+_XFER(%rsp, SRND) FOUR_ROUNDS_AND_SCHED _XFER + 0*32 - vpaddd K256+1*32(SRND), X0, XFER + leaq K256+1*32(%rip), INP + vpaddd (INP, SRND), X0, XFER vmovdqa XFER, 1*32+_XFER(%rsp, SRND) FOUR_ROUNDS_AND_SCHED _XFER + 1*32 - vpaddd K256+2*32(SRND), X0, XFER + leaq K256+2*32(%rip), INP + vpaddd (INP, SRND), X0, XFER vmovdqa XFER, 2*32+_XFER(%rsp, SRND) FOUR_ROUNDS_AND_SCHED _XFER + 2*32 - vpaddd K256+3*32(SRND), X0, XFER + leaq K256+3*32(%rip), INP + vpaddd (INP, SRND), X0, XFER vmovdqa XFER, 3*32+_XFER(%rsp, SRND) FOUR_ROUNDS_AND_SCHED _XFER + 3*32 @@ -611,11 +615,13 @@ loop1: loop2: ## Do last 16 rounds with no scheduling - vpaddd K256+0*32(SRND), X0, XFER + leaq K256+0*32(%rip), INP + vpaddd (INP, SRND), X0, XFER vmovdqa XFER, 0*32+_XFER(%rsp, SRND) DO_4ROUNDS _XFER + 0*32 - vpaddd K256+1*32(SRND), X1, XFER + leaq K256+1*32(%rip), INP + vpaddd (INP, SRND), X1, XFER vmovdqa XFER, 1*32+_XFER(%rsp, SRND) DO_4ROUNDS _XFER + 1*32 add $2*32, SRND |