diff options
author | Niels Möller <nisse@lysator.liu.se> | 2022-07-05 19:47:39 +0200 |
---|---|---|
committer | Niels Möller <nisse@lysator.liu.se> | 2022-07-05 19:47:39 +0200 |
commit | fad1bf50a5087a5b88a5500965ba3959d11f997f (patch) | |
tree | 428299e99c30bb24ce2bf048e5a887ac2dbc8eca /x86_64 | |
parent | 12bb2223428be4326c580b1b1f8e9916a2839cb1 (diff) | |
download | nettle-fad1bf50a5087a5b88a5500965ba3959d11f997f.tar.gz |
Implement _nettle_sha256_compress_n, C and x86_64 asm
Diffstat (limited to 'x86_64')
-rw-r--r-- | x86_64/fat/sha256-compress-n-2.asm (renamed from x86_64/fat/sha256-compress-2.asm) | 4 | ||||
-rw-r--r-- | x86_64/fat/sha256-compress-n.asm (renamed from x86_64/fat/sha256-compress.asm) | 4 | ||||
-rw-r--r-- | x86_64/sha256-compress-n.asm (renamed from x86_64/sha256-compress.asm) | 85 | ||||
-rw-r--r-- | x86_64/sha_ni/sha256-compress-n.asm (renamed from x86_64/sha_ni/sha256-compress.asm) | 42 |
4 files changed, 85 insertions, 50 deletions
diff --git a/x86_64/fat/sha256-compress-2.asm b/x86_64/fat/sha256-compress-n-2.asm index 996cf8c5..60f7c8f6 100644 --- a/x86_64/fat/sha256-compress-2.asm +++ b/x86_64/fat/sha256-compress-n-2.asm @@ -1,4 +1,4 @@ -C x86_64/fat/sha256-compress-2.asm +C x86_64/fat/sha256-compress-n-2.asm ifelse(` Copyright (C) 2018 Niels Möller @@ -31,4 +31,4 @@ ifelse(` ') define(`fat_transform', `$1_sha_ni') -include_src(`x86_64/sha_ni/sha256-compress.asm') +include_src(`x86_64/sha_ni/sha256-compress-n.asm') diff --git a/x86_64/fat/sha256-compress.asm b/x86_64/fat/sha256-compress-n.asm index 2aaeb5e8..fc358858 100644 --- a/x86_64/fat/sha256-compress.asm +++ b/x86_64/fat/sha256-compress-n.asm @@ -1,4 +1,4 @@ -C x86_64/fat/sha256-compress.asm +C x86_64/fat/sha256-compress-n.asm ifelse(` Copyright (C) 2018 Niels Möller @@ -31,4 +31,4 @@ ifelse(` ') define(`fat_transform', `$1_x86_64') -include_src(`x86_64/sha256-compress.asm') +include_src(`x86_64/sha256-compress-n.asm') diff --git a/x86_64/sha256-compress.asm b/x86_64/sha256-compress-n.asm index 5ed669b1..e10d260c 100644 --- a/x86_64/sha256-compress.asm +++ b/x86_64/sha256-compress-n.asm @@ -1,7 +1,7 @@ -C x86_64/sha256-compress.asm +C x86_64/sha256-compress-n.asm ifelse(` - Copyright (C) 2013 Niels Möller + Copyright (C) 2013, 2022 Niels Möller This file is part of GNU Nettle. @@ -30,21 +30,24 @@ ifelse(` not, see http://www.gnu.org/licenses/. ') - .file "sha256-compress.asm" + .file "sha256-compress-n.asm" define(`STATE', `%rdi') -define(`INPUT', `%rsi') -define(`K', `%rdx') +define(`K', `%rsi') +define(`BLOCKS', `%rdx') +define(`INPUT', `%rcx') +define(`STATE_SAVED', `64(%rsp)') + define(`SA', `%eax') define(`SB', `%ebx') -define(`SC', `%ecx') +define(`SC', `%ebp') define(`SD', `%r8d') define(`SE', `%r9d') define(`SF', `%r10d') define(`SG', `%r11d') define(`SH', `%r12d') define(`T0', `%r13d') -define(`T1', `%edi') C Overlap STATE -define(`COUNT', `%r14') +define(`T1', `%r14d') +define(`COUNT', `%rdi') C Overlap STATE define(`W', `%r15d') define(`EXPN', ` @@ -123,18 +126,21 @@ define(`NOEXPN', ` movl W, OFFSET($1)(%rsp, COUNT, 4) ') - C void - C _nettle_sha256_compress(uint32_t *state, const uint8_t *input, const uint32_t *k) + C const uint8_t * + C _nettle_sha256_compress_n(uint32_t *state, const uint32_t *k, + C size_t blocks, const uint8_t *input) .text ALIGN(16) -PROLOGUE(_nettle_sha256_compress) +PROLOGUE(_nettle_sha256_compress_n) W64_ENTRY(3, 0) + test BLOCKS, BLOCKS + jz .Lend sub $120, %rsp - mov %rbx, 64(%rsp) - mov STATE, 72(%rsp) C Save state, to free a register + mov STATE, STATE_SAVED C Save state, to free a register + mov %rbx, 72(%rsp) mov %rbp, 80(%rsp) mov %r12, 88(%rsp) mov %r13, 96(%rsp) @@ -149,7 +155,9 @@ PROLOGUE(_nettle_sha256_compress) movl 20(STATE), SF movl 24(STATE), SG movl 28(STATE), SH - xor COUNT, COUNT + +.Loop_block: + xorl XREG(COUNT), XREG(COUNT) ALIGN(16) .Loop1: @@ -161,8 +169,8 @@ PROLOGUE(_nettle_sha256_compress) NOEXPN(5) ROUND(SD,SE,SF,SG,SH,SA,SB,SC,5) NOEXPN(6) ROUND(SC,SD,SE,SF,SG,SH,SA,SB,6) NOEXPN(7) ROUND(SB,SC,SD,SE,SF,SG,SH,SA,7) - add $8, COUNT - cmp $16, COUNT + addl $8, XREG(COUNT) + cmpl $16, XREG(COUNT) jne .Loop1 .Loop2: @@ -182,22 +190,35 @@ PROLOGUE(_nettle_sha256_compress) EXPN(13) ROUND(SD,SE,SF,SG,SH,SA,SB,SC,13) EXPN(14) ROUND(SC,SD,SE,SF,SG,SH,SA,SB,14) EXPN(15) ROUND(SB,SC,SD,SE,SF,SG,SH,SA,15) - add $16, COUNT - cmp $64, COUNT + addl $16, XREG(COUNT) + cmpl $64, XREG(COUNT) jne .Loop2 - mov 72(%rsp), STATE - - addl SA, (STATE) - addl SB, 4(STATE) - addl SC, 8(STATE) - addl SD, 12(STATE) - addl SE, 16(STATE) - addl SF, 20(STATE) - addl SG, 24(STATE) - addl SH, 28(STATE) - - mov 64(%rsp), %rbx + mov STATE_SAVED, STATE + + addl (STATE), SA + addl 4(STATE), SB + addl 8(STATE), SC + addl 12(STATE), SD + addl 16(STATE), SE + addl 20(STATE), SF + addl 24(STATE), SG + addl 28(STATE), SH + + movl SA, (STATE) + movl SB, 4(STATE) + movl SC, 8(STATE) + movl SD, 12(STATE) + movl SE, 16(STATE) + movl SF, 20(STATE) + movl SG, 24(STATE) + movl SH, 28(STATE) + + add $64, INPUT + dec BLOCKS + jnz .Loop_block + + mov 72(%rsp), %rbx mov 80(%rsp), %rbp mov 88(%rsp), %r12 mov 96(%rsp), %r13 @@ -205,6 +226,8 @@ PROLOGUE(_nettle_sha256_compress) mov 112(%rsp),%r15 add $120, %rsp +.Lend: + mov INPUT, %rax W64_EXIT(3, 0) ret -EPILOGUE(_nettle_sha256_compress) +EPILOGUE(_nettle_sha256_compress_n) diff --git a/x86_64/sha_ni/sha256-compress.asm b/x86_64/sha_ni/sha256-compress-n.asm index 00bd3cd3..005909df 100644 --- a/x86_64/sha_ni/sha256-compress.asm +++ b/x86_64/sha_ni/sha256-compress-n.asm @@ -1,7 +1,7 @@ -C x86_64/sha_ni/sha256-compress.asm +C x86_64/sha_ni/sha256-compress-n.asm ifelse(` - Copyright (C) 2018 Niels Möller + Copyright (C) 2018, 2022 Niels Möller This file is part of GNU Nettle. @@ -30,10 +30,11 @@ ifelse(` not, see http://www.gnu.org/licenses/. ') - .file "sha256-compress.asm" + .file "sha256-compress-n.asm" define(`STATE', `%rdi') -define(`INPUT', `%rsi') -define(`K', `%rdx') +define(`K', `%rsi') +define(`BLOCKS', `%rdx') +define(`INPUT', `%rcx') define(`MSGK',`%xmm0') C Implicit operand of sha256rnds2 define(`MSG0',`%xmm1') @@ -45,7 +46,7 @@ define(`CDGH',`%xmm6') define(`ABEF_ORIG',`%xmm7') define(`CDGH_ORIG', `%xmm8') define(`SWAP_MASK',`%xmm9') -define(`TMP', `%xmm9') C Overlaps SWAP_MASK +define(`TMP', `%xmm10') C QROUND(M0, M1, M2, M3, R) define(`QROUND', ` @@ -69,15 +70,19 @@ define(`TRANSPOSE', ` punpcklqdq $1, $3 ') - C void - C _nettle_sha256_compress(uint32_t *state, const uint8_t *input, const uint32_t *k) + C const uint8_t * + C _nettle_sha256_compress_n(uint32_t *state, const uint32_t *k, + C size_t blocks, const uint8_t *input) .text ALIGN(16) .Lswap_mask: .byte 3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12 -PROLOGUE(_nettle_sha256_compress) - W64_ENTRY(3, 10) +PROLOGUE(_nettle_sha256_compress_n) + W64_ENTRY(4, 11) + test BLOCKS, BLOCKS + jz .Lend + movups (STATE), TMP movups 16(STATE), ABEF @@ -88,12 +93,13 @@ PROLOGUE(_nettle_sha256_compress) movdqa .Lswap_mask(%rip), SWAP_MASK - movdqa ABEF, ABEF_ORIG - movdqa CDGH, CDGH_ORIG - +.Loop: movups (INPUT), MSG0 pshufb SWAP_MASK, MSG0 + movdqa ABEF, ABEF_ORIG + movdqa CDGH, CDGH_ORIG + movdqa (K), MSGK paddd MSG0, MSGK sha256rnds2 ABEF, CDGH C Round 0-1 @@ -163,6 +169,10 @@ PROLOGUE(_nettle_sha256_compress) paddd ABEF_ORIG, ABEF paddd CDGH_ORIG, CDGH + add $64, INPUT + dec BLOCKS + jnz .Loop + TRANSPOSE(ABEF, CDGH, TMP) pshufd $0x1b, CDGH, CDGH @@ -170,6 +180,8 @@ PROLOGUE(_nettle_sha256_compress) movups CDGH, 0(STATE) movups TMP, 16(STATE) - W64_EXIT(3, 10) +.Lend: + mov INPUT, %rax + W64_EXIT(4, 11) ret -EPILOGUE(_nettle_sha256_compress) +EPILOGUE(_nettle_sha256_compress_n) |