summaryrefslogtreecommitdiff
path: root/x86_64
diff options
context:
space:
mode:
authorNiels Möller <nisse@lysator.liu.se>2022-07-05 19:47:39 +0200
committerNiels Möller <nisse@lysator.liu.se>2022-07-05 19:47:39 +0200
commitfad1bf50a5087a5b88a5500965ba3959d11f997f (patch)
tree428299e99c30bb24ce2bf048e5a887ac2dbc8eca /x86_64
parent12bb2223428be4326c580b1b1f8e9916a2839cb1 (diff)
downloadnettle-fad1bf50a5087a5b88a5500965ba3959d11f997f.tar.gz
Implement _nettle_sha256_compress_n, C and x86_64 asm
Diffstat (limited to 'x86_64')
-rw-r--r--x86_64/fat/sha256-compress-n-2.asm (renamed from x86_64/fat/sha256-compress-2.asm)4
-rw-r--r--x86_64/fat/sha256-compress-n.asm (renamed from x86_64/fat/sha256-compress.asm)4
-rw-r--r--x86_64/sha256-compress-n.asm (renamed from x86_64/sha256-compress.asm)85
-rw-r--r--x86_64/sha_ni/sha256-compress-n.asm (renamed from x86_64/sha_ni/sha256-compress.asm)42
4 files changed, 85 insertions, 50 deletions
diff --git a/x86_64/fat/sha256-compress-2.asm b/x86_64/fat/sha256-compress-n-2.asm
index 996cf8c5..60f7c8f6 100644
--- a/x86_64/fat/sha256-compress-2.asm
+++ b/x86_64/fat/sha256-compress-n-2.asm
@@ -1,4 +1,4 @@
-C x86_64/fat/sha256-compress-2.asm
+C x86_64/fat/sha256-compress-n-2.asm
ifelse(`
Copyright (C) 2018 Niels Möller
@@ -31,4 +31,4 @@ ifelse(`
')
define(`fat_transform', `$1_sha_ni')
-include_src(`x86_64/sha_ni/sha256-compress.asm')
+include_src(`x86_64/sha_ni/sha256-compress-n.asm')
diff --git a/x86_64/fat/sha256-compress.asm b/x86_64/fat/sha256-compress-n.asm
index 2aaeb5e8..fc358858 100644
--- a/x86_64/fat/sha256-compress.asm
+++ b/x86_64/fat/sha256-compress-n.asm
@@ -1,4 +1,4 @@
-C x86_64/fat/sha256-compress.asm
+C x86_64/fat/sha256-compress-n.asm
ifelse(`
Copyright (C) 2018 Niels Möller
@@ -31,4 +31,4 @@ ifelse(`
')
define(`fat_transform', `$1_x86_64')
-include_src(`x86_64/sha256-compress.asm')
+include_src(`x86_64/sha256-compress-n.asm')
diff --git a/x86_64/sha256-compress.asm b/x86_64/sha256-compress-n.asm
index 5ed669b1..e10d260c 100644
--- a/x86_64/sha256-compress.asm
+++ b/x86_64/sha256-compress-n.asm
@@ -1,7 +1,7 @@
-C x86_64/sha256-compress.asm
+C x86_64/sha256-compress-n.asm
ifelse(`
- Copyright (C) 2013 Niels Möller
+ Copyright (C) 2013, 2022 Niels Möller
This file is part of GNU Nettle.
@@ -30,21 +30,24 @@ ifelse(`
not, see http://www.gnu.org/licenses/.
')
- .file "sha256-compress.asm"
+ .file "sha256-compress-n.asm"
define(`STATE', `%rdi')
-define(`INPUT', `%rsi')
-define(`K', `%rdx')
+define(`K', `%rsi')
+define(`BLOCKS', `%rdx')
+define(`INPUT', `%rcx')
+define(`STATE_SAVED', `64(%rsp)')
+
define(`SA', `%eax')
define(`SB', `%ebx')
-define(`SC', `%ecx')
+define(`SC', `%ebp')
define(`SD', `%r8d')
define(`SE', `%r9d')
define(`SF', `%r10d')
define(`SG', `%r11d')
define(`SH', `%r12d')
define(`T0', `%r13d')
-define(`T1', `%edi') C Overlap STATE
-define(`COUNT', `%r14')
+define(`T1', `%r14d')
+define(`COUNT', `%rdi') C Overlap STATE
define(`W', `%r15d')
define(`EXPN', `
@@ -123,18 +126,21 @@ define(`NOEXPN', `
movl W, OFFSET($1)(%rsp, COUNT, 4)
')
- C void
- C _nettle_sha256_compress(uint32_t *state, const uint8_t *input, const uint32_t *k)
+ C const uint8_t *
+ C _nettle_sha256_compress_n(uint32_t *state, const uint32_t *k,
+ C size_t blocks, const uint8_t *input)
.text
ALIGN(16)
-PROLOGUE(_nettle_sha256_compress)
+PROLOGUE(_nettle_sha256_compress_n)
W64_ENTRY(3, 0)
+ test BLOCKS, BLOCKS
+ jz .Lend
sub $120, %rsp
- mov %rbx, 64(%rsp)
- mov STATE, 72(%rsp) C Save state, to free a register
+ mov STATE, STATE_SAVED C Save state, to free a register
+ mov %rbx, 72(%rsp)
mov %rbp, 80(%rsp)
mov %r12, 88(%rsp)
mov %r13, 96(%rsp)
@@ -149,7 +155,9 @@ PROLOGUE(_nettle_sha256_compress)
movl 20(STATE), SF
movl 24(STATE), SG
movl 28(STATE), SH
- xor COUNT, COUNT
+
+.Loop_block:
+ xorl XREG(COUNT), XREG(COUNT)
ALIGN(16)
.Loop1:
@@ -161,8 +169,8 @@ PROLOGUE(_nettle_sha256_compress)
NOEXPN(5) ROUND(SD,SE,SF,SG,SH,SA,SB,SC,5)
NOEXPN(6) ROUND(SC,SD,SE,SF,SG,SH,SA,SB,6)
NOEXPN(7) ROUND(SB,SC,SD,SE,SF,SG,SH,SA,7)
- add $8, COUNT
- cmp $16, COUNT
+ addl $8, XREG(COUNT)
+ cmpl $16, XREG(COUNT)
jne .Loop1
.Loop2:
@@ -182,22 +190,35 @@ PROLOGUE(_nettle_sha256_compress)
EXPN(13) ROUND(SD,SE,SF,SG,SH,SA,SB,SC,13)
EXPN(14) ROUND(SC,SD,SE,SF,SG,SH,SA,SB,14)
EXPN(15) ROUND(SB,SC,SD,SE,SF,SG,SH,SA,15)
- add $16, COUNT
- cmp $64, COUNT
+ addl $16, XREG(COUNT)
+ cmpl $64, XREG(COUNT)
jne .Loop2
- mov 72(%rsp), STATE
-
- addl SA, (STATE)
- addl SB, 4(STATE)
- addl SC, 8(STATE)
- addl SD, 12(STATE)
- addl SE, 16(STATE)
- addl SF, 20(STATE)
- addl SG, 24(STATE)
- addl SH, 28(STATE)
-
- mov 64(%rsp), %rbx
+ mov STATE_SAVED, STATE
+
+ addl (STATE), SA
+ addl 4(STATE), SB
+ addl 8(STATE), SC
+ addl 12(STATE), SD
+ addl 16(STATE), SE
+ addl 20(STATE), SF
+ addl 24(STATE), SG
+ addl 28(STATE), SH
+
+ movl SA, (STATE)
+ movl SB, 4(STATE)
+ movl SC, 8(STATE)
+ movl SD, 12(STATE)
+ movl SE, 16(STATE)
+ movl SF, 20(STATE)
+ movl SG, 24(STATE)
+ movl SH, 28(STATE)
+
+ add $64, INPUT
+ dec BLOCKS
+ jnz .Loop_block
+
+ mov 72(%rsp), %rbx
mov 80(%rsp), %rbp
mov 88(%rsp), %r12
mov 96(%rsp), %r13
@@ -205,6 +226,8 @@ PROLOGUE(_nettle_sha256_compress)
mov 112(%rsp),%r15
add $120, %rsp
+.Lend:
+ mov INPUT, %rax
W64_EXIT(3, 0)
ret
-EPILOGUE(_nettle_sha256_compress)
+EPILOGUE(_nettle_sha256_compress_n)
diff --git a/x86_64/sha_ni/sha256-compress.asm b/x86_64/sha_ni/sha256-compress-n.asm
index 00bd3cd3..005909df 100644
--- a/x86_64/sha_ni/sha256-compress.asm
+++ b/x86_64/sha_ni/sha256-compress-n.asm
@@ -1,7 +1,7 @@
-C x86_64/sha_ni/sha256-compress.asm
+C x86_64/sha_ni/sha256-compress-n.asm
ifelse(`
- Copyright (C) 2018 Niels Möller
+ Copyright (C) 2018, 2022 Niels Möller
This file is part of GNU Nettle.
@@ -30,10 +30,11 @@ ifelse(`
not, see http://www.gnu.org/licenses/.
')
- .file "sha256-compress.asm"
+ .file "sha256-compress-n.asm"
define(`STATE', `%rdi')
-define(`INPUT', `%rsi')
-define(`K', `%rdx')
+define(`K', `%rsi')
+define(`BLOCKS', `%rdx')
+define(`INPUT', `%rcx')
define(`MSGK',`%xmm0') C Implicit operand of sha256rnds2
define(`MSG0',`%xmm1')
@@ -45,7 +46,7 @@ define(`CDGH',`%xmm6')
define(`ABEF_ORIG',`%xmm7')
define(`CDGH_ORIG', `%xmm8')
define(`SWAP_MASK',`%xmm9')
-define(`TMP', `%xmm9') C Overlaps SWAP_MASK
+define(`TMP', `%xmm10')
C QROUND(M0, M1, M2, M3, R)
define(`QROUND', `
@@ -69,15 +70,19 @@ define(`TRANSPOSE', `
punpcklqdq $1, $3
')
- C void
- C _nettle_sha256_compress(uint32_t *state, const uint8_t *input, const uint32_t *k)
+ C const uint8_t *
+ C _nettle_sha256_compress_n(uint32_t *state, const uint32_t *k,
+ C size_t blocks, const uint8_t *input)
.text
ALIGN(16)
.Lswap_mask:
.byte 3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12
-PROLOGUE(_nettle_sha256_compress)
- W64_ENTRY(3, 10)
+PROLOGUE(_nettle_sha256_compress_n)
+ W64_ENTRY(4, 11)
+ test BLOCKS, BLOCKS
+ jz .Lend
+
movups (STATE), TMP
movups 16(STATE), ABEF
@@ -88,12 +93,13 @@ PROLOGUE(_nettle_sha256_compress)
movdqa .Lswap_mask(%rip), SWAP_MASK
- movdqa ABEF, ABEF_ORIG
- movdqa CDGH, CDGH_ORIG
-
+.Loop:
movups (INPUT), MSG0
pshufb SWAP_MASK, MSG0
+ movdqa ABEF, ABEF_ORIG
+ movdqa CDGH, CDGH_ORIG
+
movdqa (K), MSGK
paddd MSG0, MSGK
sha256rnds2 ABEF, CDGH C Round 0-1
@@ -163,6 +169,10 @@ PROLOGUE(_nettle_sha256_compress)
paddd ABEF_ORIG, ABEF
paddd CDGH_ORIG, CDGH
+ add $64, INPUT
+ dec BLOCKS
+ jnz .Loop
+
TRANSPOSE(ABEF, CDGH, TMP)
pshufd $0x1b, CDGH, CDGH
@@ -170,6 +180,8 @@ PROLOGUE(_nettle_sha256_compress)
movups CDGH, 0(STATE)
movups TMP, 16(STATE)
- W64_EXIT(3, 10)
+.Lend:
+ mov INPUT, %rax
+ W64_EXIT(4, 11)
ret
-EPILOGUE(_nettle_sha256_compress)
+EPILOGUE(_nettle_sha256_compress_n)