summaryrefslogtreecommitdiff
path: root/x86_64
diff options
context:
space:
mode:
authorNiels Möller <nisse@lysator.liu.se>2018-02-21 23:04:44 +0100
committerNiels Möller <nisse@lysator.liu.se>2018-02-21 23:04:44 +0100
commit34ea00b47003939fe1ea616f2e57067cea2c7c8f (patch)
tree8f7fc8cfe97b7a4b24a182345298496d774e62fc /x86_64
parentb908c40a2dcffb6cbab66360c3c1a6a554faac9d (diff)
downloadnettle-34ea00b47003939fe1ea616f2e57067cea2c7c8f.tar.gz
New sha256 implementation using sha_ni instructions.
Diffstat (limited to 'x86_64')
-rw-r--r--x86_64/sha_ni/sha256-compress.asm175
1 files changed, 175 insertions, 0 deletions
diff --git a/x86_64/sha_ni/sha256-compress.asm b/x86_64/sha_ni/sha256-compress.asm
new file mode 100644
index 00000000..f2a4bd32
--- /dev/null
+++ b/x86_64/sha_ni/sha256-compress.asm
@@ -0,0 +1,175 @@
+C x86_64/sha_ni/sha256-compress.asm
+
+ifelse(<
+ Copyright (C) 2018 Niels Möller
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+>)
+
+ .file "sha256-compress.asm"
+define(<STATE>, <%rdi>)
+define(<INPUT>, <%rsi>)
+define(<K>, <%rdx>)
+
+define(<MSGK>,<%xmm0>) C Implicit operand of sha256rnds2
+define(<MSG0>,<%xmm1>)
+define(<MSG1>,<%xmm2>)
+define(<MSG2>,<%xmm3>)
+define(<MSG3>,<%xmm4>)
+define(<ABEF>,<%xmm5>)
+define(<CDGH>,<%xmm6>)
+define(<ABEF_ORIG>,<%xmm7>)
+define(<CDGH_ORIG>, <%xmm8>)
+define(<SWAP_MASK>,<%xmm9>)
+define(<TMP>, <%xmm9>) C Overlaps SWAP_MASK
+
+C QROUND(M0, M1, M2, M3, R)
+define(<QROUND>, <
+ movdqa eval($5*4)(K), MSGK
+ paddd $1, MSGK
+ sha256rnds2 ABEF, CDGH
+ pshufd <$>0xe, MSGK, MSGK
+ sha256rnds2 CDGH, ABEF
+ movdqa $1, TMP
+ palignr <$>4, $4, TMP
+ paddd TMP, $2
+ sha256msg2 $1, $2
+ sha256msg1 $1, $4
+ >)
+
+C FIXME: Do something more clever, taking the pshufd into account.
+C TRANSPOSE(ABCD, EFGH, scratch) --> untouched, ABEF, CDGH
+define(<TRANSPOSE>, <
+ movdqa $2, $3
+ punpckhqdq $1, $2
+ punpcklqdq $1, $3
+>)
+
+ C void
+ C _nettle_sha256_compress(uint32_t *state, const uint8_t *input, const uint32_t *k)
+
+ .text
+ ALIGN(16)
+.Lswap_mask:
+ .byte 3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12
+PROLOGUE(_nettle_sha256_compress)
+ W64_ENTRY(3, 10)
+ movups (STATE), TMP
+ movups 16(STATE), ABEF
+
+ pshufd $0x1b, TMP, TMP
+ pshufd $0x1b, ABEF, ABEF
+
+ TRANSPOSE(TMP, ABEF, CDGH)
+
+ movdqa .Lswap_mask(%rip), SWAP_MASK
+
+ movdqa ABEF, ABEF_ORIG
+ movdqa CDGH, CDGH_ORIG
+
+ movups (INPUT), MSG0
+ pshufb SWAP_MASK, MSG0
+
+ movdqa (K), MSGK
+ paddd MSG0, MSGK
+ sha256rnds2 ABEF, CDGH C Round 0-1
+ pshufd $0xe, MSGK, MSGK
+ sha256rnds2 CDGH, ABEF C Round 2-3
+
+ movups 16(INPUT), MSG1
+ pshufb SWAP_MASK, MSG1
+
+ movdqa 16(K), MSGK
+ paddd MSG1, MSGK
+ sha256rnds2 ABEF, CDGH C Round 4-5
+ pshufd $0xe, MSGK, MSGK
+ sha256rnds2 CDGH, ABEF C Round 6-7
+ sha256msg1 MSG1, MSG0
+
+ movups 32(INPUT), MSG2
+ pshufb SWAP_MASK, MSG2
+
+ movdqa 32(K), MSGK
+ paddd MSG2, MSGK
+ sha256rnds2 ABEF, CDGH C Round 8-9
+ pshufd $0xe, MSGK, MSGK
+ sha256rnds2 CDGH, ABEF C Round 10-11
+ sha256msg1 MSG2, MSG1
+
+ movups 48(INPUT), MSG3
+ pshufb SWAP_MASK, MSG3
+
+ QROUND(MSG3, MSG0, MSG1, MSG2, 12) C Round 12-15
+ QROUND(MSG0, MSG1, MSG2, MSG3, 16)
+ QROUND(MSG1, MSG2, MSG3, MSG0, 20)
+ QROUND(MSG2, MSG3, MSG0, MSG1, 24)
+ QROUND(MSG3, MSG0, MSG1, MSG2, 28)
+ QROUND(MSG0, MSG1, MSG2, MSG3, 32)
+ QROUND(MSG1, MSG2, MSG3, MSG0, 36)
+ QROUND(MSG2, MSG3, MSG0, MSG1, 40)
+ QROUND(MSG3, MSG0, MSG1, MSG2, 44)
+ QROUND(MSG0, MSG1, MSG2, MSG3, 48)
+
+ movdqa 208(K), MSGK
+ paddd MSG1, MSGK
+ sha256rnds2 ABEF, CDGH C Round 52-53
+ pshufd $0xe, MSGK, MSGK
+ sha256rnds2 CDGH, ABEF C Round 54-55
+ movdqa MSG1, TMP
+ palignr $4, MSG0, TMP
+ paddd TMP, MSG2
+ sha256msg2 MSG1, MSG2
+
+ movdqa 224(K), MSGK
+ paddd MSG2, MSGK
+ sha256rnds2 ABEF, CDGH C Round 56-57
+ pshufd $0xe, MSGK, MSGK
+ sha256rnds2 CDGH, ABEF C Round 58-59
+ movdqa MSG2, TMP
+ palignr $4, MSG1, TMP
+ paddd TMP, MSG3
+ sha256msg2 MSG2, MSG3
+
+ movdqa 240(K), MSGK
+ paddd MSG3, MSGK
+ sha256rnds2 ABEF, CDGH C Round 60-61
+ pshufd $0xe, MSGK, MSGK
+ sha256rnds2 CDGH, ABEF C Round 62-63
+
+ paddd ABEF_ORIG, ABEF
+ paddd CDGH_ORIG, CDGH
+
+ TRANSPOSE(ABEF, CDGH, TMP)
+
+ pshufd $0x1b, CDGH, CDGH
+ pshufd $0x1b, TMP, TMP
+ movups CDGH, 0(STATE)
+ movups TMP, 16(STATE)
+
+ W64_EXIT(3, 10)
+ ret
+EPILOGUE(_nettle_sha256_compress)