1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
|
C -*- mode: asm; asm-comment-char: ?C; -*-
C nettle, low-level cryptographics library
C
C Copyright (C) 2010, Niels Möller
C
C The nettle library is free software; you can redistribute it and/or modify
C it under the terms of the GNU Lesser General Public License as published by
C the Free Software Foundation; either version 2.1 of the License, or (at your
C option) any later version.
C
C The nettle library is distributed in the hope that it will be useful, but
C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
C or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
C License for more details.
C
C You should have received a copy of the GNU Lesser General Public License
C along with the nettle library; see the file COPYING.LIB. If not, write to
C the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
C MA 02111-1307, USA.
C Register usage:
C Camellia state, 128-bit value in little endian order.
C L0, H0 corresponds to D1 in the spec and i0 in the C implementation.
C while L1, H1 corresponds to D2/i1.
define(<L0>,<%eax>)
define(<H0>,<%ebx>)
define(<L1>,<%ecx>)
define(<H1>,<%edx>)
define(<TMP>,<%ebp>)
define(<KEY>,<%esi>)
define(<T>,<%edi>)
C Locals on the stack
define(<FRAME_L0>, <(%esp)>)
define(<FRAME_H0>, <4(%esp)>)
define(<FRAME_L1>, <8(%esp)>)
define(<FRAME_H1>, <12(%esp)>)
define(<FRAME_CNT>, <16(%esp)>)
C Arguments on stack.
define(<FRAME_CTX>, <40(%esp)>)
define(<FRAME_TABLE>, <44(%esp)>)
define(<FRAME_LENGTH>, <48(%esp)>)
define(<FRAME_DST>, <52(%esp)>)
define(<FRAME_SRC>, <56(%esp)>)
define(<SP1110>, <(T,$1,4)>)
define(<SP0222>, <1024(T,$1,4)>)
define(<SP3033>, <2048(T,$1,4)>)
define(<SP4404>, <3072(T,$1,4)>)
C ROUND(xl, xh, yl, yh, key-offset)
C xl and xh are rotated 16 bits at the end
C yl and yh are read from stack, and left in registers
define(<ROUND>, <
movzbl LREG($1), TMP
movl SP1110(TMP), $4
movzbl HREG($1), TMP
xorl SP4404(TMP), $4
roll <$>16, $1
movzbl LREG($2), TMP
movl SP4404(TMP), $3
movzbl HREG($2), TMP
xorl SP3033(TMP), $3
roll <$>16, $2
movzbl LREG($1), TMP
xorl SP3033(TMP), $4
movzbl HREG($1), TMP
xorl SP0222(TMP), $4
movzbl LREG($2), TMP
xorl SP0222(TMP), $3
movzbl HREG($2), TMP
xorl SP1110(TMP), $3
xorl $3, $4
rorl <$>8, $3
xorl $4, $3
xorl $5(KEY), $3
xorl $5 + 4(KEY), $4
xorl FRAME_$3, $3
xorl FRAME_$4, $4
>)
C Six rounds, with inputs and outputs in registers.
define(<ROUND6>, <
movl L0, FRAME_L0
movl H0, FRAME_H0
movl L1, FRAME_L1
movl H1, FRAME_H1
ROUND(L0,H0,<L1>,<H1>,0)
movl L1, FRAME_L1
movl H1, FRAME_H1
ROUND(L1,H1,<L0>,<H0>,8)
movl L0, FRAME_L0
movl H0, FRAME_H0
ROUND(L0,H0,<L1>,<H1>,16)
movl L1, FRAME_L1
movl H1, FRAME_H1
ROUND(L1,H1,<L0>,<H0>,24)
movl L0, FRAME_L0
movl H0, FRAME_H0
ROUND(L0,H0,<L1>,<H1>,32)
ROUND(L1,H1,<L0>,<H0>,40)
roll <$>16, L1
roll <$>16, H1
>)
C FL(x0, x1, key-offset)
define(<FL>, <
movl $3 + 4(KEY), TMP
andl $2, TMP
roll <$>1, TMP
xorl TMP, $1
movl $3(KEY), TMP
orl $1, TMP
xorl TMP, $2
>)
C FLINV(x0, x1, key-offset)
define(<FLINV>, <
movl $3(KEY), TMP
orl $1, TMP
xorl TMP, $2
movl $3 + 4(KEY), TMP
andl $2, TMP
roll <$>1, TMP
xorl TMP, $1
>)
.file "camellia-encrypt-internal.asm"
C _camellia_crypt(struct camellia_context *ctx,
C const struct camellia_table *T,
C unsigned length, uint8_t *dst,
C uint8_t *src)
.text
ALIGN(4)
PROLOGUE(_nettle_camellia_crypt)
C save all registers that need to be saved
pushl %ebx C 32(%esp)
pushl %ebp C 28(%esp)
pushl %esi C 24(%esp)
pushl %edi C 20(%esp)
subl $20, %esp
movl FRAME_LENGTH, %ebp
testl %ebp,%ebp
jz .Lend
.Lblock_loop:
C Load data, note that we'll happily do unaligned loads
movl FRAME_SRC, TMP
movl (TMP), H0
bswap H0
movl 4(TMP), L0
bswap L0
movl 8(TMP), H1
bswap H1
movl 12(TMP), L1
bswap L1
addl $16, FRAME_SRC
movl FRAME_CTX, KEY
movl (KEY), TMP
subl $8, TMP
mov TMP, FRAME_CNT
C Whitening using first subkey
xor 4(KEY), L0
xor 8(KEY), H0
add $12, KEY
movl FRAME_TABLE, T
ROUND6
.Lround_loop:
add $64, KEY
FL(L0, H0, -16)
FLINV(L1, H1, -8)
ROUND6
sub $8, FRAME_CNT
ja .Lround_loop
movl FRAME_DST, TMP
bswap H0
movl H0,8(TMP)
bswap L0
movl L0,12(TMP)
xorl 52(KEY), H1
bswap H1
movl H1, 0(TMP)
xorl 48(KEY), L1
bswap L1
movl L1, 4(TMP)
addl $16, FRAME_DST
subl $16, FRAME_LENGTH
ja .Lblock_loop
.Lend:
addl $20, %esp
popl %edi
popl %esi
popl %ebp
popl %ebx
ret
EPILOGUE(_nettle_camellia_crypt)
|