1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
|
C x86/camellia-crypt-internal.asm
ifelse(`
Copyright (C) 2010, Niels Möller
This file is part of GNU Nettle.
GNU Nettle is free software: you can redistribute it and/or
modify it under the terms of either:
* the GNU Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your
option) any later version.
or
* the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your
option) any later version.
or both in parallel, as here.
GNU Nettle is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
')
C Register usage:
C Camellia state, 128-bit value in little endian order.
C L0, H0 corresponds to D1 in the spec and i0 in the C implementation.
C while L1, H1 corresponds to D2/i1.
define(`L0',`%eax')
define(`H0',`%ebx')
define(`L1',`%ecx')
define(`H1',`%edx')
define(`TMP',`%ebp')
define(`KEY',`%esi')
define(`T',`%edi')
C Locals on the stack
define(`FRAME_L0', `(%esp)')
define(`FRAME_H0', `4(%esp)')
define(`FRAME_L1', `8(%esp)')
define(`FRAME_H1', `12(%esp)')
define(`FRAME_CNT', `16(%esp)')
C Arguments on stack.
define(`FRAME_NKEYS', `40(%esp)')
define(`FRAME_KEYS', `44(%esp)')
define(`FRAME_TABLE', `48(%esp)')
define(`FRAME_LENGTH', `52(%esp)')
define(`FRAME_DST', `56(%esp)')
define(`FRAME_SRC', `60(%esp)')
define(`SP1110', `(T,$1,4)')
define(`SP0222', `1024(T,$1,4)')
define(`SP3033', `2048(T,$1,4)')
define(`SP4404', `3072(T,$1,4)')
C ROUND(xl, xh, yl, yh, key-offset)
C xl and xh are rotated 16 bits at the end
C yl and yh are read from stack, and left in registers
define(`ROUND', `
movzbl LREG($1), TMP
movl SP1110(TMP), $4
movzbl HREG($1), TMP
xorl SP4404(TMP), $4
roll `$'16, $1
movzbl LREG($2), TMP
movl SP4404(TMP), $3
movzbl HREG($2), TMP
xorl SP3033(TMP), $3
roll `$'16, $2
movzbl LREG($1), TMP
xorl SP3033(TMP), $4
movzbl HREG($1), TMP
xorl SP0222(TMP), $4
movzbl LREG($2), TMP
xorl SP0222(TMP), $3
movzbl HREG($2), TMP
xorl SP1110(TMP), $3
xorl $5(KEY), $4
xorl $5 + 4(KEY), $3
xorl $3, $4
rorl `$'8, $3
xorl $4, $3
xorl FRAME_$3, $3
xorl FRAME_$4, $4
')
C Six rounds, with inputs and outputs in registers.
define(`ROUND6', `
movl L0, FRAME_L0
movl H0, FRAME_H0
movl L1, FRAME_L1
movl H1, FRAME_H1
ROUND(L0,H0,`L1',`H1',0)
movl L1, FRAME_L1
movl H1, FRAME_H1
ROUND(L1,H1,`L0',`H0',8)
movl L0, FRAME_L0
movl H0, FRAME_H0
ROUND(L0,H0,`L1',`H1',16)
movl L1, FRAME_L1
movl H1, FRAME_H1
ROUND(L1,H1,`L0',`H0',24)
movl L0, FRAME_L0
movl H0, FRAME_H0
ROUND(L0,H0,`L1',`H1',32)
ROUND(L1,H1,`L0',`H0',40)
roll `$'16, L1
roll `$'16, H1
')
C FL(x0, x1, key-offset)
define(`FL', `
movl $3 + 4(KEY), TMP
andl $2, TMP
roll `$'1, TMP
xorl TMP, $1
movl $3(KEY), TMP
orl $1, TMP
xorl TMP, $2
')
C FLINV(x0, x1, key-offset)
define(`FLINV', `
movl $3(KEY), TMP
orl $1, TMP
xorl TMP, $2
movl $3 + 4(KEY), TMP
andl $2, TMP
roll `$'1, TMP
xorl TMP, $1
')
.file "camellia-crypt-internal.asm"
C _camellia_crypt(unsigned nkeys, const uint64_t *keys,
C const struct camellia_table *T,
C size_t length, uint8_t *dst,
C uint8_t *src)
.text
ALIGN(16)
PROLOGUE(_nettle_camellia_crypt)
C save all registers that need to be saved
pushl %ebx C 32(%esp)
pushl %ebp C 28(%esp)
pushl %esi C 24(%esp)
pushl %edi C 20(%esp)
subl $20, %esp
movl FRAME_LENGTH, %ebp
testl %ebp,%ebp
jz .Lend
.Lblock_loop:
C Load data, note that we'll happily do unaligned loads
movl FRAME_SRC, TMP
movl (TMP), H0
bswap H0
movl 4(TMP), L0
bswap L0
movl 8(TMP), H1
bswap H1
movl 12(TMP), L1
bswap L1
addl $16, FRAME_SRC
movl FRAME_KEYS, KEY
movl FRAME_NKEYS, TMP
subl $8, TMP
movl TMP, FRAME_CNT
xorl (KEY), L0
xorl 4(KEY), H0
addl $8, KEY
movl FRAME_TABLE, T
ROUND6
.Lround_loop:
addl $64, KEY
FL(L0, H0, -16)
FLINV(L1, H1, -8)
ROUND6
subl $8, FRAME_CNT
ja .Lround_loop
movl FRAME_DST, TMP
bswap H0
movl H0,8(TMP)
bswap L0
movl L0,12(TMP)
xorl 52(KEY), H1
bswap H1
movl H1, 0(TMP)
xorl 48(KEY), L1
bswap L1
movl L1, 4(TMP)
addl $16, FRAME_DST
subl $16, FRAME_LENGTH
ja .Lblock_loop
.Lend:
addl $20, %esp
popl %edi
popl %esi
popl %ebp
popl %ebx
ret
EPILOGUE(_nettle_camellia_crypt)
|