1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
|
C x86_64/sha_ni/sha256-compress.asm
ifelse(<
Copyright (C) 2018 Niels Möller
This file is part of GNU Nettle.
GNU Nettle is free software: you can redistribute it and/or
modify it under the terms of either:
* the GNU Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your
option) any later version.
or
* the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your
option) any later version.
or both in parallel, as here.
GNU Nettle is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
>)
.file "sha256-compress.asm"
define(<STATE>, <%rdi>)
define(<INPUT>, <%rsi>)
define(<K>, <%rdx>)
define(<MSGK>,<%xmm0>) C Implicit operand of sha256rnds2
define(<MSG0>,<%xmm1>)
define(<MSG1>,<%xmm2>)
define(<MSG2>,<%xmm3>)
define(<MSG3>,<%xmm4>)
define(<ABEF>,<%xmm5>)
define(<CDGH>,<%xmm6>)
define(<ABEF_ORIG>,<%xmm7>)
define(<CDGH_ORIG>, <%xmm8>)
define(<SWAP_MASK>,<%xmm9>)
define(<TMP>, <%xmm9>) C Overlaps SWAP_MASK
C QROUND(M0, M1, M2, M3, R)
define(<QROUND>, <
movdqa eval($5*4)(K), MSGK
paddd $1, MSGK
sha256rnds2 ABEF, CDGH
pshufd <$>0xe, MSGK, MSGK
sha256rnds2 CDGH, ABEF
movdqa $1, TMP
palignr <$>4, $4, TMP
paddd TMP, $2
sha256msg2 $1, $2
sha256msg1 $1, $4
>)
C FIXME: Do something more clever, taking the pshufd into account.
C TRANSPOSE(ABCD, EFGH, scratch) --> untouched, ABEF, CDGH
define(<TRANSPOSE>, <
movdqa $2, $3
punpckhqdq $1, $2
punpcklqdq $1, $3
>)
C void
C _nettle_sha256_compress(uint32_t *state, const uint8_t *input, const uint32_t *k)
.text
ALIGN(16)
.Lswap_mask:
.byte 3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12
PROLOGUE(_nettle_sha256_compress)
W64_ENTRY(3, 10)
movups (STATE), TMP
movups 16(STATE), ABEF
pshufd $0x1b, TMP, TMP
pshufd $0x1b, ABEF, ABEF
TRANSPOSE(TMP, ABEF, CDGH)
movdqa .Lswap_mask(%rip), SWAP_MASK
movdqa ABEF, ABEF_ORIG
movdqa CDGH, CDGH_ORIG
movups (INPUT), MSG0
pshufb SWAP_MASK, MSG0
movdqa (K), MSGK
paddd MSG0, MSGK
sha256rnds2 ABEF, CDGH C Round 0-1
pshufd $0xe, MSGK, MSGK
sha256rnds2 CDGH, ABEF C Round 2-3
movups 16(INPUT), MSG1
pshufb SWAP_MASK, MSG1
movdqa 16(K), MSGK
paddd MSG1, MSGK
sha256rnds2 ABEF, CDGH C Round 4-5
pshufd $0xe, MSGK, MSGK
sha256rnds2 CDGH, ABEF C Round 6-7
sha256msg1 MSG1, MSG0
movups 32(INPUT), MSG2
pshufb SWAP_MASK, MSG2
movdqa 32(K), MSGK
paddd MSG2, MSGK
sha256rnds2 ABEF, CDGH C Round 8-9
pshufd $0xe, MSGK, MSGK
sha256rnds2 CDGH, ABEF C Round 10-11
sha256msg1 MSG2, MSG1
movups 48(INPUT), MSG3
pshufb SWAP_MASK, MSG3
QROUND(MSG3, MSG0, MSG1, MSG2, 12) C Round 12-15
QROUND(MSG0, MSG1, MSG2, MSG3, 16)
QROUND(MSG1, MSG2, MSG3, MSG0, 20)
QROUND(MSG2, MSG3, MSG0, MSG1, 24)
QROUND(MSG3, MSG0, MSG1, MSG2, 28)
QROUND(MSG0, MSG1, MSG2, MSG3, 32)
QROUND(MSG1, MSG2, MSG3, MSG0, 36)
QROUND(MSG2, MSG3, MSG0, MSG1, 40)
QROUND(MSG3, MSG0, MSG1, MSG2, 44)
QROUND(MSG0, MSG1, MSG2, MSG3, 48)
movdqa 208(K), MSGK
paddd MSG1, MSGK
sha256rnds2 ABEF, CDGH C Round 52-53
pshufd $0xe, MSGK, MSGK
sha256rnds2 CDGH, ABEF C Round 54-55
movdqa MSG1, TMP
palignr $4, MSG0, TMP
paddd TMP, MSG2
sha256msg2 MSG1, MSG2
movdqa 224(K), MSGK
paddd MSG2, MSGK
sha256rnds2 ABEF, CDGH C Round 56-57
pshufd $0xe, MSGK, MSGK
sha256rnds2 CDGH, ABEF C Round 58-59
movdqa MSG2, TMP
palignr $4, MSG1, TMP
paddd TMP, MSG3
sha256msg2 MSG2, MSG3
movdqa 240(K), MSGK
paddd MSG3, MSGK
sha256rnds2 ABEF, CDGH C Round 60-61
pshufd $0xe, MSGK, MSGK
sha256rnds2 CDGH, ABEF C Round 62-63
paddd ABEF_ORIG, ABEF
paddd CDGH_ORIG, CDGH
TRANSPOSE(ABEF, CDGH, TMP)
pshufd $0x1b, CDGH, CDGH
pshufd $0x1b, TMP, TMP
movups CDGH, 0(STATE)
movups TMP, 16(STATE)
W64_EXIT(3, 10)
ret
EPILOGUE(_nettle_sha256_compress)
|