1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
|
C nettle, low-level cryptographics library
C
C Copyright (C) 2011 Niels Möller
C
C The nettle library is free software; you can redistribute it and/or modify
C it under the terms of the GNU Lesser General Public License as published by
C the Free Software Foundation; either version 2.1 of the License, or (at your
C option) any later version.
C
C The nettle library is distributed in the hope that it will be useful, but
C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
C or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
C License for more details.
C
C You should have received a copy of the GNU Lesser General Public License
C along with the nettle library; see the file COPYING.LIB. If not, write to
C the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
C MA 02111-1307, USA.
C WROL(count, w)
define(<WROL>, <
movdqa $2, T0
pslld <$>$1, $2
psrld <$>eval(32 - $1), T0
por T0, $2
>)
C Note: Diagrams use little-endian representation, with least
C significant word to the left.
C Transpose values from:
C +----+----+----+----+
C x0: | a0 | a1 | a2 | a3 |
C x1: | b0 | b1 | b2 | b3 |
C x2: | c0 | c1 | c2 | c3 |
C x3: | d0 | d1 | d2 | d3 |
C +----+----+----+----+
C To:
C +----+----+----+----+
C x0: | a0 | b0 | c0 | d0 |
C x1: | a1 | b1 | c1 | d1 |
C x2: | a2 | b2 | c2 | d2 |
C x3: | a3 | b3 | c3 | d3 |
C +----+----+----+----+
define(<WTRANSPOSE>, <
movdqa $1, T0
punpcklqdq $3, T0 C |a0 a1 c0 c1|
punpckhqdq $3, $1 C |a2 a3 c2 c3|
pshufd <$>0xd8, T0, T0 C |a0 c0 a1 c1|
pshufd <$>0xd8, $1, T1 C |a2 c2 a3 c3|
movdqa $2, T2
punpcklqdq $4, T2 C |b0 b1 d0 11|
punpckhqdq $4, $2 C |b2 b3 d2 d3|
pshufd <$>0xd8, T2, T2 C |b0 d0 b1 d1|
pshufd <$>0xd8, $2, T3 C |b2 d2 b3 d3|
movdqa T0, $1
punpckldq T2, $1 C |a0 b0 c0 d0|
movdqa T0, $2
punpckhdq T2, $2 C |a1 b1 c1 d1|
movdqa T1, $3
punpckldq T3, $3 C |a2 b2 c2 d2|
movdqa T1, $4
punpckhdq T3, $4 C |a3 b3 c3 d3|
>)
C FIXME: Arrange 16-byte alignment, so we can use movaps?
define(<WKEYXOR>, <
movups $1(CTX, CNT), T0
pshufd <$>0x55, T0, T1
pshufd <$>0xaa, T0, T2
pxor T1, $3
pxor T2, $4
pshufd <$>0xff, T0, T1
pshufd <$>0x00, T0, T0
pxor T1, $5
pxor T0, $2
>)
|