summaryrefslogtreecommitdiff
path: root/Modules/_sha3/keccak
diff options
context:
space:
mode:
Diffstat (limited to 'Modules/_sha3/keccak')
-rw-r--r--Modules/_sha3/keccak/KeccakF-1600-32-rvk.macros555
-rw-r--r--Modules/_sha3/keccak/KeccakF-1600-32-s1.macros1187
-rw-r--r--Modules/_sha3/keccak/KeccakF-1600-32-s2.macros1187
-rw-r--r--Modules/_sha3/keccak/KeccakF-1600-32.macros26
-rw-r--r--Modules/_sha3/keccak/KeccakF-1600-64.macros728
-rw-r--r--Modules/_sha3/keccak/KeccakF-1600-int-set.h6
-rw-r--r--Modules/_sha3/keccak/KeccakF-1600-interface.h46
-rw-r--r--Modules/_sha3/keccak/KeccakF-1600-opt32-settings.h6
-rw-r--r--Modules/_sha3/keccak/KeccakF-1600-opt32.c524
-rw-r--r--Modules/_sha3/keccak/KeccakF-1600-opt64-settings.h9
-rw-r--r--Modules/_sha3/keccak/KeccakF-1600-opt64.c510
-rw-r--r--Modules/_sha3/keccak/KeccakF-1600-simd128.macros651
-rw-r--r--Modules/_sha3/keccak/KeccakF-1600-simd64.macros517
-rw-r--r--Modules/_sha3/keccak/KeccakF-1600-unrolling.macros124
-rw-r--r--Modules/_sha3/keccak/KeccakF-1600-xop.macros573
-rw-r--r--Modules/_sha3/keccak/KeccakNISTInterface.c83
-rw-r--r--Modules/_sha3/keccak/KeccakNISTInterface.h72
-rw-r--r--Modules/_sha3/keccak/KeccakSponge.c266
-rw-r--r--Modules/_sha3/keccak/KeccakSponge.h76
-rwxr-xr-xModules/_sha3/keccak/brg_endian.h142
-rw-r--r--Modules/_sha3/keccak/crypto_hash.h0
21 files changed, 7288 insertions, 0 deletions
diff --git a/Modules/_sha3/keccak/KeccakF-1600-32-rvk.macros b/Modules/_sha3/keccak/KeccakF-1600-32-rvk.macros
new file mode 100644
index 0000000000..c0c9029873
--- /dev/null
+++ b/Modules/_sha3/keccak/KeccakF-1600-32-rvk.macros
@@ -0,0 +1,555 @@
+/*
+The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
+Michaël Peeters and Gilles Van Assche. For more information, feedback or
+questions, please refer to our website: http://keccak.noekeon.org/
+
+Implementation by Ronny Van Keer,
+hereby denoted as "the implementer".
+
+To the extent possible under law, the implementer has waived all copyright
+and related or neighboring rights to the source code in this file.
+http://creativecommons.org/publicdomain/zero/1.0/
+*/
+
+static const UINT32 KeccakF1600RoundConstants_int2[2*24] =
+{
+ 0x00000001UL, 0x00000000UL,
+ 0x00000000UL, 0x00000089UL,
+ 0x00000000UL, 0x8000008bUL,
+ 0x00000000UL, 0x80008080UL,
+ 0x00000001UL, 0x0000008bUL,
+ 0x00000001UL, 0x00008000UL,
+ 0x00000001UL, 0x80008088UL,
+ 0x00000001UL, 0x80000082UL,
+ 0x00000000UL, 0x0000000bUL,
+ 0x00000000UL, 0x0000000aUL,
+ 0x00000001UL, 0x00008082UL,
+ 0x00000000UL, 0x00008003UL,
+ 0x00000001UL, 0x0000808bUL,
+ 0x00000001UL, 0x8000000bUL,
+ 0x00000001UL, 0x8000008aUL,
+ 0x00000001UL, 0x80000081UL,
+ 0x00000000UL, 0x80000081UL,
+ 0x00000000UL, 0x80000008UL,
+ 0x00000000UL, 0x00000083UL,
+ 0x00000000UL, 0x80008003UL,
+ 0x00000001UL, 0x80008088UL,
+ 0x00000000UL, 0x80000088UL,
+ 0x00000001UL, 0x00008000UL,
+ 0x00000000UL, 0x80008082UL
+};
+
+#undef rounds
+
+#define rounds \
+{ \
+ UINT32 Da0, De0, Di0, Do0, Du0; \
+ UINT32 Da1, De1, Di1, Do1, Du1; \
+ UINT32 Ba, Be, Bi, Bo, Bu; \
+ UINT32 Aba0, Abe0, Abi0, Abo0, Abu0; \
+ UINT32 Aba1, Abe1, Abi1, Abo1, Abu1; \
+ UINT32 Aga0, Age0, Agi0, Ago0, Agu0; \
+ UINT32 Aga1, Age1, Agi1, Ago1, Agu1; \
+ UINT32 Aka0, Ake0, Aki0, Ako0, Aku0; \
+ UINT32 Aka1, Ake1, Aki1, Ako1, Aku1; \
+ UINT32 Ama0, Ame0, Ami0, Amo0, Amu0; \
+ UINT32 Ama1, Ame1, Ami1, Amo1, Amu1; \
+ UINT32 Asa0, Ase0, Asi0, Aso0, Asu0; \
+ UINT32 Asa1, Ase1, Asi1, Aso1, Asu1; \
+ UINT32 Cw, Cx, Cy, Cz; \
+ UINT32 Eba0, Ebe0, Ebi0, Ebo0, Ebu0; \
+ UINT32 Eba1, Ebe1, Ebi1, Ebo1, Ebu1; \
+ UINT32 Ega0, Ege0, Egi0, Ego0, Egu0; \
+ UINT32 Ega1, Ege1, Egi1, Ego1, Egu1; \
+ UINT32 Eka0, Eke0, Eki0, Eko0, Eku0; \
+ UINT32 Eka1, Eke1, Eki1, Eko1, Eku1; \
+ UINT32 Ema0, Eme0, Emi0, Emo0, Emu0; \
+ UINT32 Ema1, Eme1, Emi1, Emo1, Emu1; \
+ UINT32 Esa0, Ese0, Esi0, Eso0, Esu0; \
+ UINT32 Esa1, Ese1, Esi1, Eso1, Esu1; \
+ const UINT32 * pRoundConstants = KeccakF1600RoundConstants_int2; \
+ UINT32 i; \
+\
+ copyFromState(A, state) \
+\
+ for( i = 12; i != 0; --i ) { \
+ Cx = Abu0^Agu0^Aku0^Amu0^Asu0; \
+ Du1 = Abe1^Age1^Ake1^Ame1^Ase1; \
+ Da0 = Cx^ROL32(Du1, 1); \
+ Cz = Abu1^Agu1^Aku1^Amu1^Asu1; \
+ Du0 = Abe0^Age0^Ake0^Ame0^Ase0; \
+ Da1 = Cz^Du0; \
+\
+ Cw = Abi0^Agi0^Aki0^Ami0^Asi0; \
+ Do0 = Cw^ROL32(Cz, 1); \
+ Cy = Abi1^Agi1^Aki1^Ami1^Asi1; \
+ Do1 = Cy^Cx; \
+\
+ Cx = Aba0^Aga0^Aka0^Ama0^Asa0; \
+ De0 = Cx^ROL32(Cy, 1); \
+ Cz = Aba1^Aga1^Aka1^Ama1^Asa1; \
+ De1 = Cz^Cw; \
+\
+ Cy = Abo1^Ago1^Ako1^Amo1^Aso1; \
+ Di0 = Du0^ROL32(Cy, 1); \
+ Cw = Abo0^Ago0^Ako0^Amo0^Aso0; \
+ Di1 = Du1^Cw; \
+\
+ Du0 = Cw^ROL32(Cz, 1); \
+ Du1 = Cy^Cx; \
+\
+ Aba0 ^= Da0; \
+ Ba = Aba0; \
+ Age0 ^= De0; \
+ Be = ROL32(Age0, 22); \
+ Aki1 ^= Di1; \
+ Bi = ROL32(Aki1, 22); \
+ Amo1 ^= Do1; \
+ Bo = ROL32(Amo1, 11); \
+ Asu0 ^= Du0; \
+ Bu = ROL32(Asu0, 7); \
+ Eba0 = Ba ^((~Be)& Bi ) ^ *(pRoundConstants++); \
+ Ebe0 = Be ^((~Bi)& Bo ); \
+ Ebi0 = Bi ^((~Bo)& Bu ); \
+ Ebo0 = Bo ^((~Bu)& Ba ); \
+ Ebu0 = Bu ^((~Ba)& Be ); \
+\
+ Abo0 ^= Do0; \
+ Ba = ROL32(Abo0, 14); \
+ Agu0 ^= Du0; \
+ Be = ROL32(Agu0, 10); \
+ Aka1 ^= Da1; \
+ Bi = ROL32(Aka1, 2); \
+ Ame1 ^= De1; \
+ Bo = ROL32(Ame1, 23); \
+ Asi1 ^= Di1; \
+ Bu = ROL32(Asi1, 31); \
+ Ega0 = Ba ^((~Be)& Bi ); \
+ Ege0 = Be ^((~Bi)& Bo ); \
+ Egi0 = Bi ^((~Bo)& Bu ); \
+ Ego0 = Bo ^((~Bu)& Ba ); \
+ Egu0 = Bu ^((~Ba)& Be ); \
+\
+ Abe1 ^= De1; \
+ Ba = ROL32(Abe1, 1); \
+ Agi0 ^= Di0; \
+ Be = ROL32(Agi0, 3); \
+ Ako1 ^= Do1; \
+ Bi = ROL32(Ako1, 13); \
+ Amu0 ^= Du0; \
+ Bo = ROL32(Amu0, 4); \
+ Asa0 ^= Da0; \
+ Bu = ROL32(Asa0, 9); \
+ Eka0 = Ba ^((~Be)& Bi ); \
+ Eke0 = Be ^((~Bi)& Bo ); \
+ Eki0 = Bi ^((~Bo)& Bu ); \
+ Eko0 = Bo ^((~Bu)& Ba ); \
+ Eku0 = Bu ^((~Ba)& Be ); \
+\
+ Abu1 ^= Du1; \
+ Ba = ROL32(Abu1, 14); \
+ Aga0 ^= Da0; \
+ Be = ROL32(Aga0, 18); \
+ Ake0 ^= De0; \
+ Bi = ROL32(Ake0, 5); \
+ Ami1 ^= Di1; \
+ Bo = ROL32(Ami1, 8); \
+ Aso0 ^= Do0; \
+ Bu = ROL32(Aso0, 28); \
+ Ema0 = Ba ^((~Be)& Bi ); \
+ Eme0 = Be ^((~Bi)& Bo ); \
+ Emi0 = Bi ^((~Bo)& Bu ); \
+ Emo0 = Bo ^((~Bu)& Ba ); \
+ Emu0 = Bu ^((~Ba)& Be ); \
+\
+ Abi0 ^= Di0; \
+ Ba = ROL32(Abi0, 31); \
+ Ago1 ^= Do1; \
+ Be = ROL32(Ago1, 28); \
+ Aku1 ^= Du1; \
+ Bi = ROL32(Aku1, 20); \
+ Ama1 ^= Da1; \
+ Bo = ROL32(Ama1, 21); \
+ Ase0 ^= De0; \
+ Bu = ROL32(Ase0, 1); \
+ Esa0 = Ba ^((~Be)& Bi ); \
+ Ese0 = Be ^((~Bi)& Bo ); \
+ Esi0 = Bi ^((~Bo)& Bu ); \
+ Eso0 = Bo ^((~Bu)& Ba ); \
+ Esu0 = Bu ^((~Ba)& Be ); \
+\
+ Aba1 ^= Da1; \
+ Ba = Aba1; \
+ Age1 ^= De1; \
+ Be = ROL32(Age1, 22); \
+ Aki0 ^= Di0; \
+ Bi = ROL32(Aki0, 21); \
+ Amo0 ^= Do0; \
+ Bo = ROL32(Amo0, 10); \
+ Asu1 ^= Du1; \
+ Bu = ROL32(Asu1, 7); \
+ Eba1 = Ba ^((~Be)& Bi ); \
+ Eba1 ^= *(pRoundConstants++); \
+ Ebe1 = Be ^((~Bi)& Bo ); \
+ Ebi1 = Bi ^((~Bo)& Bu ); \
+ Ebo1 = Bo ^((~Bu)& Ba ); \
+ Ebu1 = Bu ^((~Ba)& Be ); \
+\
+ Abo1 ^= Do1; \
+ Ba = ROL32(Abo1, 14); \
+ Agu1 ^= Du1; \
+ Be = ROL32(Agu1, 10); \
+ Aka0 ^= Da0; \
+ Bi = ROL32(Aka0, 1); \
+ Ame0 ^= De0; \
+ Bo = ROL32(Ame0, 22); \
+ Asi0 ^= Di0; \
+ Bu = ROL32(Asi0, 30); \
+ Ega1 = Ba ^((~Be)& Bi ); \
+ Ege1 = Be ^((~Bi)& Bo ); \
+ Egi1 = Bi ^((~Bo)& Bu ); \
+ Ego1 = Bo ^((~Bu)& Ba ); \
+ Egu1 = Bu ^((~Ba)& Be ); \
+\
+ Abe0 ^= De0; \
+ Ba = Abe0; \
+ Agi1 ^= Di1; \
+ Be = ROL32(Agi1, 3); \
+ Ako0 ^= Do0; \
+ Bi = ROL32(Ako0, 12); \
+ Amu1 ^= Du1; \
+ Bo = ROL32(Amu1, 4); \
+ Asa1 ^= Da1; \
+ Bu = ROL32(Asa1, 9); \
+ Eka1 = Ba ^((~Be)& Bi ); \
+ Eke1 = Be ^((~Bi)& Bo ); \
+ Eki1 = Bi ^((~Bo)& Bu ); \
+ Eko1 = Bo ^((~Bu)& Ba ); \
+ Eku1 = Bu ^((~Ba)& Be ); \
+\
+ Abu0 ^= Du0; \
+ Ba = ROL32(Abu0, 13); \
+ Aga1 ^= Da1; \
+ Be = ROL32(Aga1, 18); \
+ Ake1 ^= De1; \
+ Bi = ROL32(Ake1, 5); \
+ Ami0 ^= Di0; \
+ Bo = ROL32(Ami0, 7); \
+ Aso1 ^= Do1; \
+ Bu = ROL32(Aso1, 28); \
+ Ema1 = Ba ^((~Be)& Bi ); \
+ Eme1 = Be ^((~Bi)& Bo ); \
+ Emi1 = Bi ^((~Bo)& Bu ); \
+ Emo1 = Bo ^((~Bu)& Ba ); \
+ Emu1 = Bu ^((~Ba)& Be ); \
+\
+ Abi1 ^= Di1; \
+ Ba = ROL32(Abi1, 31); \
+ Ago0 ^= Do0; \
+ Be = ROL32(Ago0, 27); \
+ Aku0 ^= Du0; \
+ Bi = ROL32(Aku0, 19); \
+ Ama0 ^= Da0; \
+ Bo = ROL32(Ama0, 20); \
+ Ase1 ^= De1; \
+ Bu = ROL32(Ase1, 1); \
+ Esa1 = Ba ^((~Be)& Bi ); \
+ Ese1 = Be ^((~Bi)& Bo ); \
+ Esi1 = Bi ^((~Bo)& Bu ); \
+ Eso1 = Bo ^((~Bu)& Ba ); \
+ Esu1 = Bu ^((~Ba)& Be ); \
+\
+ Cx = Ebu0^Egu0^Eku0^Emu0^Esu0; \
+ Du1 = Ebe1^Ege1^Eke1^Eme1^Ese1; \
+ Da0 = Cx^ROL32(Du1, 1); \
+ Cz = Ebu1^Egu1^Eku1^Emu1^Esu1; \
+ Du0 = Ebe0^Ege0^Eke0^Eme0^Ese0; \
+ Da1 = Cz^Du0; \
+\
+ Cw = Ebi0^Egi0^Eki0^Emi0^Esi0; \
+ Do0 = Cw^ROL32(Cz, 1); \
+ Cy = Ebi1^Egi1^Eki1^Emi1^Esi1; \
+ Do1 = Cy^Cx; \
+\
+ Cx = Eba0^Ega0^Eka0^Ema0^Esa0; \
+ De0 = Cx^ROL32(Cy, 1); \
+ Cz = Eba1^Ega1^Eka1^Ema1^Esa1; \
+ De1 = Cz^Cw; \
+\
+ Cy = Ebo1^Ego1^Eko1^Emo1^Eso1; \
+ Di0 = Du0^ROL32(Cy, 1); \
+ Cw = Ebo0^Ego0^Eko0^Emo0^Eso0; \
+ Di1 = Du1^Cw; \
+\
+ Du0 = Cw^ROL32(Cz, 1); \
+ Du1 = Cy^Cx; \
+\
+ Eba0 ^= Da0; \
+ Ba = Eba0; \
+ Ege0 ^= De0; \
+ Be = ROL32(Ege0, 22); \
+ Eki1 ^= Di1; \
+ Bi = ROL32(Eki1, 22); \
+ Emo1 ^= Do1; \
+ Bo = ROL32(Emo1, 11); \
+ Esu0 ^= Du0; \
+ Bu = ROL32(Esu0, 7); \
+ Aba0 = Ba ^((~Be)& Bi ); \
+ Aba0 ^= *(pRoundConstants++); \
+ Abe0 = Be ^((~Bi)& Bo ); \
+ Abi0 = Bi ^((~Bo)& Bu ); \
+ Abo0 = Bo ^((~Bu)& Ba ); \
+ Abu0 = Bu ^((~Ba)& Be ); \
+\
+ Ebo0 ^= Do0; \
+ Ba = ROL32(Ebo0, 14); \
+ Egu0 ^= Du0; \
+ Be = ROL32(Egu0, 10); \
+ Eka1 ^= Da1; \
+ Bi = ROL32(Eka1, 2); \
+ Eme1 ^= De1; \
+ Bo = ROL32(Eme1, 23); \
+ Esi1 ^= Di1; \
+ Bu = ROL32(Esi1, 31); \
+ Aga0 = Ba ^((~Be)& Bi ); \
+ Age0 = Be ^((~Bi)& Bo ); \
+ Agi0 = Bi ^((~Bo)& Bu ); \
+ Ago0 = Bo ^((~Bu)& Ba ); \
+ Agu0 = Bu ^((~Ba)& Be ); \
+\
+ Ebe1 ^= De1; \
+ Ba = ROL32(Ebe1, 1); \
+ Egi0 ^= Di0; \
+ Be = ROL32(Egi0, 3); \
+ Eko1 ^= Do1; \
+ Bi = ROL32(Eko1, 13); \
+ Emu0 ^= Du0; \
+ Bo = ROL32(Emu0, 4); \
+ Esa0 ^= Da0; \
+ Bu = ROL32(Esa0, 9); \
+ Aka0 = Ba ^((~Be)& Bi ); \
+ Ake0 = Be ^((~Bi)& Bo ); \
+ Aki0 = Bi ^((~Bo)& Bu ); \
+ Ako0 = Bo ^((~Bu)& Ba ); \
+ Aku0 = Bu ^((~Ba)& Be ); \
+\
+ Ebu1 ^= Du1; \
+ Ba = ROL32(Ebu1, 14); \
+ Ega0 ^= Da0; \
+ Be = ROL32(Ega0, 18); \
+ Eke0 ^= De0; \
+ Bi = ROL32(Eke0, 5); \
+ Emi1 ^= Di1; \
+ Bo = ROL32(Emi1, 8); \
+ Eso0 ^= Do0; \
+ Bu = ROL32(Eso0, 28); \
+ Ama0 = Ba ^((~Be)& Bi ); \
+ Ame0 = Be ^((~Bi)& Bo ); \
+ Ami0 = Bi ^((~Bo)& Bu ); \
+ Amo0 = Bo ^((~Bu)& Ba ); \
+ Amu0 = Bu ^((~Ba)& Be ); \
+\
+ Ebi0 ^= Di0; \
+ Ba = ROL32(Ebi0, 31); \
+ Ego1 ^= Do1; \
+ Be = ROL32(Ego1, 28); \
+ Eku1 ^= Du1; \
+ Bi = ROL32(Eku1, 20); \
+ Ema1 ^= Da1; \
+ Bo = ROL32(Ema1, 21); \
+ Ese0 ^= De0; \
+ Bu = ROL32(Ese0, 1); \
+ Asa0 = Ba ^((~Be)& Bi ); \
+ Ase0 = Be ^((~Bi)& Bo ); \
+ Asi0 = Bi ^((~Bo)& Bu ); \
+ Aso0 = Bo ^((~Bu)& Ba ); \
+ Asu0 = Bu ^((~Ba)& Be ); \
+\
+ Eba1 ^= Da1; \
+ Ba = Eba1; \
+ Ege1 ^= De1; \
+ Be = ROL32(Ege1, 22); \
+ Eki0 ^= Di0; \
+ Bi = ROL32(Eki0, 21); \
+ Emo0 ^= Do0; \
+ Bo = ROL32(Emo0, 10); \
+ Esu1 ^= Du1; \
+ Bu = ROL32(Esu1, 7); \
+ Aba1 = Ba ^((~Be)& Bi ); \
+ Aba1 ^= *(pRoundConstants++); \
+ Abe1 = Be ^((~Bi)& Bo ); \
+ Abi1 = Bi ^((~Bo)& Bu ); \
+ Abo1 = Bo ^((~Bu)& Ba ); \
+ Abu1 = Bu ^((~Ba)& Be ); \
+\
+ Ebo1 ^= Do1; \
+ Ba = ROL32(Ebo1, 14); \
+ Egu1 ^= Du1; \
+ Be = ROL32(Egu1, 10); \
+ Eka0 ^= Da0; \
+ Bi = ROL32(Eka0, 1); \
+ Eme0 ^= De0; \
+ Bo = ROL32(Eme0, 22); \
+ Esi0 ^= Di0; \
+ Bu = ROL32(Esi0, 30); \
+ Aga1 = Ba ^((~Be)& Bi ); \
+ Age1 = Be ^((~Bi)& Bo ); \
+ Agi1 = Bi ^((~Bo)& Bu ); \
+ Ago1 = Bo ^((~Bu)& Ba ); \
+ Agu1 = Bu ^((~Ba)& Be ); \
+\
+ Ebe0 ^= De0; \
+ Ba = Ebe0; \
+ Egi1 ^= Di1; \
+ Be = ROL32(Egi1, 3); \
+ Eko0 ^= Do0; \
+ Bi = ROL32(Eko0, 12); \
+ Emu1 ^= Du1; \
+ Bo = ROL32(Emu1, 4); \
+ Esa1 ^= Da1; \
+ Bu = ROL32(Esa1, 9); \
+ Aka1 = Ba ^((~Be)& Bi ); \
+ Ake1 = Be ^((~Bi)& Bo ); \
+ Aki1 = Bi ^((~Bo)& Bu ); \
+ Ako1 = Bo ^((~Bu)& Ba ); \
+ Aku1 = Bu ^((~Ba)& Be ); \
+\
+ Ebu0 ^= Du0; \
+ Ba = ROL32(Ebu0, 13); \
+ Ega1 ^= Da1; \
+ Be = ROL32(Ega1, 18); \
+ Eke1 ^= De1; \
+ Bi = ROL32(Eke1, 5); \
+ Emi0 ^= Di0; \
+ Bo = ROL32(Emi0, 7); \
+ Eso1 ^= Do1; \
+ Bu = ROL32(Eso1, 28); \
+ Ama1 = Ba ^((~Be)& Bi ); \
+ Ame1 = Be ^((~Bi)& Bo ); \
+ Ami1 = Bi ^((~Bo)& Bu ); \
+ Amo1 = Bo ^((~Bu)& Ba ); \
+ Amu1 = Bu ^((~Ba)& Be ); \
+\
+ Ebi1 ^= Di1; \
+ Ba = ROL32(Ebi1, 31); \
+ Ego0 ^= Do0; \
+ Be = ROL32(Ego0, 27); \
+ Eku0 ^= Du0; \
+ Bi = ROL32(Eku0, 19); \
+ Ema0 ^= Da0; \
+ Bo = ROL32(Ema0, 20); \
+ Ese1 ^= De1; \
+ Bu = ROL32(Ese1, 1); \
+ Asa1 = Ba ^((~Be)& Bi ); \
+ Ase1 = Be ^((~Bi)& Bo ); \
+ Asi1 = Bi ^((~Bo)& Bu ); \
+ Aso1 = Bo ^((~Bu)& Ba ); \
+ Asu1 = Bu ^((~Ba)& Be ); \
+ } \
+ copyToState(state, A) \
+}
+
+#define copyFromState(X, state) \
+ X##ba0 = state[ 0]; \
+ X##ba1 = state[ 1]; \
+ X##be0 = state[ 2]; \
+ X##be1 = state[ 3]; \
+ X##bi0 = state[ 4]; \
+ X##bi1 = state[ 5]; \
+ X##bo0 = state[ 6]; \
+ X##bo1 = state[ 7]; \
+ X##bu0 = state[ 8]; \
+ X##bu1 = state[ 9]; \
+ X##ga0 = state[10]; \
+ X##ga1 = state[11]; \
+ X##ge0 = state[12]; \
+ X##ge1 = state[13]; \
+ X##gi0 = state[14]; \
+ X##gi1 = state[15]; \
+ X##go0 = state[16]; \
+ X##go1 = state[17]; \
+ X##gu0 = state[18]; \
+ X##gu1 = state[19]; \
+ X##ka0 = state[20]; \
+ X##ka1 = state[21]; \
+ X##ke0 = state[22]; \
+ X##ke1 = state[23]; \
+ X##ki0 = state[24]; \
+ X##ki1 = state[25]; \
+ X##ko0 = state[26]; \
+ X##ko1 = state[27]; \
+ X##ku0 = state[28]; \
+ X##ku1 = state[29]; \
+ X##ma0 = state[30]; \
+ X##ma1 = state[31]; \
+ X##me0 = state[32]; \
+ X##me1 = state[33]; \
+ X##mi0 = state[34]; \
+ X##mi1 = state[35]; \
+ X##mo0 = state[36]; \
+ X##mo1 = state[37]; \
+ X##mu0 = state[38]; \
+ X##mu1 = state[39]; \
+ X##sa0 = state[40]; \
+ X##sa1 = state[41]; \
+ X##se0 = state[42]; \
+ X##se1 = state[43]; \
+ X##si0 = state[44]; \
+ X##si1 = state[45]; \
+ X##so0 = state[46]; \
+ X##so1 = state[47]; \
+ X##su0 = state[48]; \
+ X##su1 = state[49]; \
+
+#define copyToState(state, X) \
+ state[ 0] = X##ba0; \
+ state[ 1] = X##ba1; \
+ state[ 2] = X##be0; \
+ state[ 3] = X##be1; \
+ state[ 4] = X##bi0; \
+ state[ 5] = X##bi1; \
+ state[ 6] = X##bo0; \
+ state[ 7] = X##bo1; \
+ state[ 8] = X##bu0; \
+ state[ 9] = X##bu1; \
+ state[10] = X##ga0; \
+ state[11] = X##ga1; \
+ state[12] = X##ge0; \
+ state[13] = X##ge1; \
+ state[14] = X##gi0; \
+ state[15] = X##gi1; \
+ state[16] = X##go0; \
+ state[17] = X##go1; \
+ state[18] = X##gu0; \
+ state[19] = X##gu1; \
+ state[20] = X##ka0; \
+ state[21] = X##ka1; \
+ state[22] = X##ke0; \
+ state[23] = X##ke1; \
+ state[24] = X##ki0; \
+ state[25] = X##ki1; \
+ state[26] = X##ko0; \
+ state[27] = X##ko1; \
+ state[28] = X##ku0; \
+ state[29] = X##ku1; \
+ state[30] = X##ma0; \
+ state[31] = X##ma1; \
+ state[32] = X##me0; \
+ state[33] = X##me1; \
+ state[34] = X##mi0; \
+ state[35] = X##mi1; \
+ state[36] = X##mo0; \
+ state[37] = X##mo1; \
+ state[38] = X##mu0; \
+ state[39] = X##mu1; \
+ state[40] = X##sa0; \
+ state[41] = X##sa1; \
+ state[42] = X##se0; \
+ state[43] = X##se1; \
+ state[44] = X##si0; \
+ state[45] = X##si1; \
+ state[46] = X##so0; \
+ state[47] = X##so1; \
+ state[48] = X##su0; \
+ state[49] = X##su1; \
+
diff --git a/Modules/_sha3/keccak/KeccakF-1600-32-s1.macros b/Modules/_sha3/keccak/KeccakF-1600-32-s1.macros
new file mode 100644
index 0000000000..373d61df6e
--- /dev/null
+++ b/Modules/_sha3/keccak/KeccakF-1600-32-s1.macros
@@ -0,0 +1,1187 @@
+/*
+Code automatically generated by KeccakTools!
+
+The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
+Michaël Peeters and Gilles Van Assche. For more information, feedback or
+questions, please refer to our website: http://keccak.noekeon.org/
+
+Implementation by the designers,
+hereby denoted as "the implementer".
+
+To the extent possible under law, the implementer has waived all copyright
+and related or neighboring rights to the source code in this file.
+http://creativecommons.org/publicdomain/zero/1.0/
+*/
+
+#define declareABCDE \
+ UINT32 Aba0, Abe0, Abi0, Abo0, Abu0; \
+ UINT32 Aba1, Abe1, Abi1, Abo1, Abu1; \
+ UINT32 Aga0, Age0, Agi0, Ago0, Agu0; \
+ UINT32 Aga1, Age1, Agi1, Ago1, Agu1; \
+ UINT32 Aka0, Ake0, Aki0, Ako0, Aku0; \
+ UINT32 Aka1, Ake1, Aki1, Ako1, Aku1; \
+ UINT32 Ama0, Ame0, Ami0, Amo0, Amu0; \
+ UINT32 Ama1, Ame1, Ami1, Amo1, Amu1; \
+ UINT32 Asa0, Ase0, Asi0, Aso0, Asu0; \
+ UINT32 Asa1, Ase1, Asi1, Aso1, Asu1; \
+ UINT32 Bba0, Bbe0, Bbi0, Bbo0, Bbu0; \
+ UINT32 Bba1, Bbe1, Bbi1, Bbo1, Bbu1; \
+ UINT32 Bga0, Bge0, Bgi0, Bgo0, Bgu0; \
+ UINT32 Bga1, Bge1, Bgi1, Bgo1, Bgu1; \
+ UINT32 Bka0, Bke0, Bki0, Bko0, Bku0; \
+ UINT32 Bka1, Bke1, Bki1, Bko1, Bku1; \
+ UINT32 Bma0, Bme0, Bmi0, Bmo0, Bmu0; \
+ UINT32 Bma1, Bme1, Bmi1, Bmo1, Bmu1; \
+ UINT32 Bsa0, Bse0, Bsi0, Bso0, Bsu0; \
+ UINT32 Bsa1, Bse1, Bsi1, Bso1, Bsu1; \
+ UINT32 Ca0, Ce0, Ci0, Co0, Cu0; \
+ UINT32 Ca1, Ce1, Ci1, Co1, Cu1; \
+ UINT32 Da0, De0, Di0, Do0, Du0; \
+ UINT32 Da1, De1, Di1, Do1, Du1; \
+ UINT32 Eba0, Ebe0, Ebi0, Ebo0, Ebu0; \
+ UINT32 Eba1, Ebe1, Ebi1, Ebo1, Ebu1; \
+ UINT32 Ega0, Ege0, Egi0, Ego0, Egu0; \
+ UINT32 Ega1, Ege1, Egi1, Ego1, Egu1; \
+ UINT32 Eka0, Eke0, Eki0, Eko0, Eku0; \
+ UINT32 Eka1, Eke1, Eki1, Eko1, Eku1; \
+ UINT32 Ema0, Eme0, Emi0, Emo0, Emu0; \
+ UINT32 Ema1, Eme1, Emi1, Emo1, Emu1; \
+ UINT32 Esa0, Ese0, Esi0, Eso0, Esu0; \
+ UINT32 Esa1, Ese1, Esi1, Eso1, Esu1; \
+
+#define prepareTheta \
+ Ca0 = Aba0^Aga0^Aka0^Ama0^Asa0; \
+ Ca1 = Aba1^Aga1^Aka1^Ama1^Asa1; \
+ Ce0 = Abe0^Age0^Ake0^Ame0^Ase0; \
+ Ce1 = Abe1^Age1^Ake1^Ame1^Ase1; \
+ Ci0 = Abi0^Agi0^Aki0^Ami0^Asi0; \
+ Ci1 = Abi1^Agi1^Aki1^Ami1^Asi1; \
+ Co0 = Abo0^Ago0^Ako0^Amo0^Aso0; \
+ Co1 = Abo1^Ago1^Ako1^Amo1^Aso1; \
+ Cu0 = Abu0^Agu0^Aku0^Amu0^Asu0; \
+ Cu1 = Abu1^Agu1^Aku1^Amu1^Asu1; \
+
+#ifdef UseBebigokimisa
+/* --- Code for round, with prepare-theta (lane complementing pattern 'bebigokimisa') */
+/* --- using factor 2 interleaving, 64-bit lanes mapped to 32-bit words */
+#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
+ Da0 = Cu0^ROL32(Ce1, 1); \
+ Da1 = Cu1^Ce0; \
+ De0 = Ca0^ROL32(Ci1, 1); \
+ De1 = Ca1^Ci0; \
+ Di0 = Ce0^ROL32(Co1, 1); \
+ Di1 = Ce1^Co0; \
+ Do0 = Ci0^ROL32(Cu1, 1); \
+ Do1 = Ci1^Cu0; \
+ Du0 = Co0^ROL32(Ca1, 1); \
+ Du1 = Co1^Ca0; \
+\
+ A##ba0 ^= Da0; \
+ Bba0 = A##ba0; \
+ A##ge0 ^= De0; \
+ Bbe0 = ROL32(A##ge0, 22); \
+ A##ki1 ^= Di1; \
+ Bbi0 = ROL32(A##ki1, 22); \
+ A##mo1 ^= Do1; \
+ Bbo0 = ROL32(A##mo1, 11); \
+ A##su0 ^= Du0; \
+ Bbu0 = ROL32(A##su0, 7); \
+ E##ba0 = Bba0 ^( Bbe0 | Bbi0 ); \
+ E##ba0 ^= KeccakF1600RoundConstants_int2_0[i]; \
+ Ca0 = E##ba0; \
+ E##be0 = Bbe0 ^((~Bbi0)| Bbo0 ); \
+ Ce0 = E##be0; \
+ E##bi0 = Bbi0 ^( Bbo0 & Bbu0 ); \
+ Ci0 = E##bi0; \
+ E##bo0 = Bbo0 ^( Bbu0 | Bba0 ); \
+ Co0 = E##bo0; \
+ E##bu0 = Bbu0 ^( Bba0 & Bbe0 ); \
+ Cu0 = E##bu0; \
+\
+ A##ba1 ^= Da1; \
+ Bba1 = A##ba1; \
+ A##ge1 ^= De1; \
+ Bbe1 = ROL32(A##ge1, 22); \
+ A##ki0 ^= Di0; \
+ Bbi1 = ROL32(A##ki0, 21); \
+ A##mo0 ^= Do0; \
+ Bbo1 = ROL32(A##mo0, 10); \
+ A##su1 ^= Du1; \
+ Bbu1 = ROL32(A##su1, 7); \
+ E##ba1 = Bba1 ^( Bbe1 | Bbi1 ); \
+ E##ba1 ^= KeccakF1600RoundConstants_int2_1[i]; \
+ Ca1 = E##ba1; \
+ E##be1 = Bbe1 ^((~Bbi1)| Bbo1 ); \
+ Ce1 = E##be1; \
+ E##bi1 = Bbi1 ^( Bbo1 & Bbu1 ); \
+ Ci1 = E##bi1; \
+ E##bo1 = Bbo1 ^( Bbu1 | Bba1 ); \
+ Co1 = E##bo1; \
+ E##bu1 = Bbu1 ^( Bba1 & Bbe1 ); \
+ Cu1 = E##bu1; \
+\
+ A##bo0 ^= Do0; \
+ Bga0 = ROL32(A##bo0, 14); \
+ A##gu0 ^= Du0; \
+ Bge0 = ROL32(A##gu0, 10); \
+ A##ka1 ^= Da1; \
+ Bgi0 = ROL32(A##ka1, 2); \
+ A##me1 ^= De1; \
+ Bgo0 = ROL32(A##me1, 23); \
+ A##si1 ^= Di1; \
+ Bgu0 = ROL32(A##si1, 31); \
+ E##ga0 = Bga0 ^( Bge0 | Bgi0 ); \
+ Ca0 ^= E##ga0; \
+ E##ge0 = Bge0 ^( Bgi0 & Bgo0 ); \
+ Ce0 ^= E##ge0; \
+ E##gi0 = Bgi0 ^( Bgo0 |(~Bgu0)); \
+ Ci0 ^= E##gi0; \
+ E##go0 = Bgo0 ^( Bgu0 | Bga0 ); \
+ Co0 ^= E##go0; \
+ E##gu0 = Bgu0 ^( Bga0 & Bge0 ); \
+ Cu0 ^= E##gu0; \
+\
+ A##bo1 ^= Do1; \
+ Bga1 = ROL32(A##bo1, 14); \
+ A##gu1 ^= Du1; \
+ Bge1 = ROL32(A##gu1, 10); \
+ A##ka0 ^= Da0; \
+ Bgi1 = ROL32(A##ka0, 1); \
+ A##me0 ^= De0; \
+ Bgo1 = ROL32(A##me0, 22); \
+ A##si0 ^= Di0; \
+ Bgu1 = ROL32(A##si0, 30); \
+ E##ga1 = Bga1 ^( Bge1 | Bgi1 ); \
+ Ca1 ^= E##ga1; \
+ E##ge1 = Bge1 ^( Bgi1 & Bgo1 ); \
+ Ce1 ^= E##ge1; \
+ E##gi1 = Bgi1 ^( Bgo1 |(~Bgu1)); \
+ Ci1 ^= E##gi1; \
+ E##go1 = Bgo1 ^( Bgu1 | Bga1 ); \
+ Co1 ^= E##go1; \
+ E##gu1 = Bgu1 ^( Bga1 & Bge1 ); \
+ Cu1 ^= E##gu1; \
+\
+ A##be1 ^= De1; \
+ Bka0 = ROL32(A##be1, 1); \
+ A##gi0 ^= Di0; \
+ Bke0 = ROL32(A##gi0, 3); \
+ A##ko1 ^= Do1; \
+ Bki0 = ROL32(A##ko1, 13); \
+ A##mu0 ^= Du0; \
+ Bko0 = ROL32(A##mu0, 4); \
+ A##sa0 ^= Da0; \
+ Bku0 = ROL32(A##sa0, 9); \
+ E##ka0 = Bka0 ^( Bke0 | Bki0 ); \
+ Ca0 ^= E##ka0; \
+ E##ke0 = Bke0 ^( Bki0 & Bko0 ); \
+ Ce0 ^= E##ke0; \
+ E##ki0 = Bki0 ^((~Bko0)& Bku0 ); \
+ Ci0 ^= E##ki0; \
+ E##ko0 = (~Bko0)^( Bku0 | Bka0 ); \
+ Co0 ^= E##ko0; \
+ E##ku0 = Bku0 ^( Bka0 & Bke0 ); \
+ Cu0 ^= E##ku0; \
+\
+ A##be0 ^= De0; \
+ Bka1 = A##be0; \
+ A##gi1 ^= Di1; \
+ Bke1 = ROL32(A##gi1, 3); \
+ A##ko0 ^= Do0; \
+ Bki1 = ROL32(A##ko0, 12); \
+ A##mu1 ^= Du1; \
+ Bko1 = ROL32(A##mu1, 4); \
+ A##sa1 ^= Da1; \
+ Bku1 = ROL32(A##sa1, 9); \
+ E##ka1 = Bka1 ^( Bke1 | Bki1 ); \
+ Ca1 ^= E##ka1; \
+ E##ke1 = Bke1 ^( Bki1 & Bko1 ); \
+ Ce1 ^= E##ke1; \
+ E##ki1 = Bki1 ^((~Bko1)& Bku1 ); \
+ Ci1 ^= E##ki1; \
+ E##ko1 = (~Bko1)^( Bku1 | Bka1 ); \
+ Co1 ^= E##ko1; \
+ E##ku1 = Bku1 ^( Bka1 & Bke1 ); \
+ Cu1 ^= E##ku1; \
+\
+ A##bu1 ^= Du1; \
+ Bma0 = ROL32(A##bu1, 14); \
+ A##ga0 ^= Da0; \
+ Bme0 = ROL32(A##ga0, 18); \
+ A##ke0 ^= De0; \
+ Bmi0 = ROL32(A##ke0, 5); \
+ A##mi1 ^= Di1; \
+ Bmo0 = ROL32(A##mi1, 8); \
+ A##so0 ^= Do0; \
+ Bmu0 = ROL32(A##so0, 28); \
+ E##ma0 = Bma0 ^( Bme0 & Bmi0 ); \
+ Ca0 ^= E##ma0; \
+ E##me0 = Bme0 ^( Bmi0 | Bmo0 ); \
+ Ce0 ^= E##me0; \
+ E##mi0 = Bmi0 ^((~Bmo0)| Bmu0 ); \
+ Ci0 ^= E##mi0; \
+ E##mo0 = (~Bmo0)^( Bmu0 & Bma0 ); \
+ Co0 ^= E##mo0; \
+ E##mu0 = Bmu0 ^( Bma0 | Bme0 ); \
+ Cu0 ^= E##mu0; \
+\
+ A##bu0 ^= Du0; \
+ Bma1 = ROL32(A##bu0, 13); \
+ A##ga1 ^= Da1; \
+ Bme1 = ROL32(A##ga1, 18); \
+ A##ke1 ^= De1; \
+ Bmi1 = ROL32(A##ke1, 5); \
+ A##mi0 ^= Di0; \
+ Bmo1 = ROL32(A##mi0, 7); \
+ A##so1 ^= Do1; \
+ Bmu1 = ROL32(A##so1, 28); \
+ E##ma1 = Bma1 ^( Bme1 & Bmi1 ); \
+ Ca1 ^= E##ma1; \
+ E##me1 = Bme1 ^( Bmi1 | Bmo1 ); \
+ Ce1 ^= E##me1; \
+ E##mi1 = Bmi1 ^((~Bmo1)| Bmu1 ); \
+ Ci1 ^= E##mi1; \
+ E##mo1 = (~Bmo1)^( Bmu1 & Bma1 ); \
+ Co1 ^= E##mo1; \
+ E##mu1 = Bmu1 ^( Bma1 | Bme1 ); \
+ Cu1 ^= E##mu1; \
+\
+ A##bi0 ^= Di0; \
+ Bsa0 = ROL32(A##bi0, 31); \
+ A##go1 ^= Do1; \
+ Bse0 = ROL32(A##go1, 28); \
+ A##ku1 ^= Du1; \
+ Bsi0 = ROL32(A##ku1, 20); \
+ A##ma1 ^= Da1; \
+ Bso0 = ROL32(A##ma1, 21); \
+ A##se0 ^= De0; \
+ Bsu0 = ROL32(A##se0, 1); \
+ E##sa0 = Bsa0 ^((~Bse0)& Bsi0 ); \
+ Ca0 ^= E##sa0; \
+ E##se0 = (~Bse0)^( Bsi0 | Bso0 ); \
+ Ce0 ^= E##se0; \
+ E##si0 = Bsi0 ^( Bso0 & Bsu0 ); \
+ Ci0 ^= E##si0; \
+ E##so0 = Bso0 ^( Bsu0 | Bsa0 ); \
+ Co0 ^= E##so0; \
+ E##su0 = Bsu0 ^( Bsa0 & Bse0 ); \
+ Cu0 ^= E##su0; \
+\
+ A##bi1 ^= Di1; \
+ Bsa1 = ROL32(A##bi1, 31); \
+ A##go0 ^= Do0; \
+ Bse1 = ROL32(A##go0, 27); \
+ A##ku0 ^= Du0; \
+ Bsi1 = ROL32(A##ku0, 19); \
+ A##ma0 ^= Da0; \
+ Bso1 = ROL32(A##ma0, 20); \
+ A##se1 ^= De1; \
+ Bsu1 = ROL32(A##se1, 1); \
+ E##sa1 = Bsa1 ^((~Bse1)& Bsi1 ); \
+ Ca1 ^= E##sa1; \
+ E##se1 = (~Bse1)^( Bsi1 | Bso1 ); \
+ Ce1 ^= E##se1; \
+ E##si1 = Bsi1 ^( Bso1 & Bsu1 ); \
+ Ci1 ^= E##si1; \
+ E##so1 = Bso1 ^( Bsu1 | Bsa1 ); \
+ Co1 ^= E##so1; \
+ E##su1 = Bsu1 ^( Bsa1 & Bse1 ); \
+ Cu1 ^= E##su1; \
+\
+
+/* --- Code for round (lane complementing pattern 'bebigokimisa') */
+/* --- using factor 2 interleaving, 64-bit lanes mapped to 32-bit words */
+#define thetaRhoPiChiIota(i, A, E) \
+ Da0 = Cu0^ROL32(Ce1, 1); \
+ Da1 = Cu1^Ce0; \
+ De0 = Ca0^ROL32(Ci1, 1); \
+ De1 = Ca1^Ci0; \
+ Di0 = Ce0^ROL32(Co1, 1); \
+ Di1 = Ce1^Co0; \
+ Do0 = Ci0^ROL32(Cu1, 1); \
+ Do1 = Ci1^Cu0; \
+ Du0 = Co0^ROL32(Ca1, 1); \
+ Du1 = Co1^Ca0; \
+\
+ A##ba0 ^= Da0; \
+ Bba0 = A##ba0; \
+ A##ge0 ^= De0; \
+ Bbe0 = ROL32(A##ge0, 22); \
+ A##ki1 ^= Di1; \
+ Bbi0 = ROL32(A##ki1, 22); \
+ A##mo1 ^= Do1; \
+ Bbo0 = ROL32(A##mo1, 11); \
+ A##su0 ^= Du0; \
+ Bbu0 = ROL32(A##su0, 7); \
+ E##ba0 = Bba0 ^( Bbe0 | Bbi0 ); \
+ E##ba0 ^= KeccakF1600RoundConstants_int2_0[i]; \
+ E##be0 = Bbe0 ^((~Bbi0)| Bbo0 ); \
+ E##bi0 = Bbi0 ^( Bbo0 & Bbu0 ); \
+ E##bo0 = Bbo0 ^( Bbu0 | Bba0 ); \
+ E##bu0 = Bbu0 ^( Bba0 & Bbe0 ); \
+\
+ A##ba1 ^= Da1; \
+ Bba1 = A##ba1; \
+ A##ge1 ^= De1; \
+ Bbe1 = ROL32(A##ge1, 22); \
+ A##ki0 ^= Di0; \
+ Bbi1 = ROL32(A##ki0, 21); \
+ A##mo0 ^= Do0; \
+ Bbo1 = ROL32(A##mo0, 10); \
+ A##su1 ^= Du1; \
+ Bbu1 = ROL32(A##su1, 7); \
+ E##ba1 = Bba1 ^( Bbe1 | Bbi1 ); \
+ E##ba1 ^= KeccakF1600RoundConstants_int2_1[i]; \
+ E##be1 = Bbe1 ^((~Bbi1)| Bbo1 ); \
+ E##bi1 = Bbi1 ^( Bbo1 & Bbu1 ); \
+ E##bo1 = Bbo1 ^( Bbu1 | Bba1 ); \
+ E##bu1 = Bbu1 ^( Bba1 & Bbe1 ); \
+\
+ A##bo0 ^= Do0; \
+ Bga0 = ROL32(A##bo0, 14); \
+ A##gu0 ^= Du0; \
+ Bge0 = ROL32(A##gu0, 10); \
+ A##ka1 ^= Da1; \
+ Bgi0 = ROL32(A##ka1, 2); \
+ A##me1 ^= De1; \
+ Bgo0 = ROL32(A##me1, 23); \
+ A##si1 ^= Di1; \
+ Bgu0 = ROL32(A##si1, 31); \
+ E##ga0 = Bga0 ^( Bge0 | Bgi0 ); \
+ E##ge0 = Bge0 ^( Bgi0 & Bgo0 ); \
+ E##gi0 = Bgi0 ^( Bgo0 |(~Bgu0)); \
+ E##go0 = Bgo0 ^( Bgu0 | Bga0 ); \
+ E##gu0 = Bgu0 ^( Bga0 & Bge0 ); \
+\
+ A##bo1 ^= Do1; \
+ Bga1 = ROL32(A##bo1, 14); \
+ A##gu1 ^= Du1; \
+ Bge1 = ROL32(A##gu1, 10); \
+ A##ka0 ^= Da0; \
+ Bgi1 = ROL32(A##ka0, 1); \
+ A##me0 ^= De0; \
+ Bgo1 = ROL32(A##me0, 22); \
+ A##si0 ^= Di0; \
+ Bgu1 = ROL32(A##si0, 30); \
+ E##ga1 = Bga1 ^( Bge1 | Bgi1 ); \
+ E##ge1 = Bge1 ^( Bgi1 & Bgo1 ); \
+ E##gi1 = Bgi1 ^( Bgo1 |(~Bgu1)); \
+ E##go1 = Bgo1 ^( Bgu1 | Bga1 ); \
+ E##gu1 = Bgu1 ^( Bga1 & Bge1 ); \
+\
+ A##be1 ^= De1; \
+ Bka0 = ROL32(A##be1, 1); \
+ A##gi0 ^= Di0; \
+ Bke0 = ROL32(A##gi0, 3); \
+ A##ko1 ^= Do1; \
+ Bki0 = ROL32(A##ko1, 13); \
+ A##mu0 ^= Du0; \
+ Bko0 = ROL32(A##mu0, 4); \
+ A##sa0 ^= Da0; \
+ Bku0 = ROL32(A##sa0, 9); \
+ E##ka0 = Bka0 ^( Bke0 | Bki0 ); \
+ E##ke0 = Bke0 ^( Bki0 & Bko0 ); \
+ E##ki0 = Bki0 ^((~Bko0)& Bku0 ); \
+ E##ko0 = (~Bko0)^( Bku0 | Bka0 ); \
+ E##ku0 = Bku0 ^( Bka0 & Bke0 ); \
+\
+ A##be0 ^= De0; \
+ Bka1 = A##be0; \
+ A##gi1 ^= Di1; \
+ Bke1 = ROL32(A##gi1, 3); \
+ A##ko0 ^= Do0; \
+ Bki1 = ROL32(A##ko0, 12); \
+ A##mu1 ^= Du1; \
+ Bko1 = ROL32(A##mu1, 4); \
+ A##sa1 ^= Da1; \
+ Bku1 = ROL32(A##sa1, 9); \
+ E##ka1 = Bka1 ^( Bke1 | Bki1 ); \
+ E##ke1 = Bke1 ^( Bki1 & Bko1 ); \
+ E##ki1 = Bki1 ^((~Bko1)& Bku1 ); \
+ E##ko1 = (~Bko1)^( Bku1 | Bka1 ); \
+ E##ku1 = Bku1 ^( Bka1 & Bke1 ); \
+\
+ A##bu1 ^= Du1; \
+ Bma0 = ROL32(A##bu1, 14); \
+ A##ga0 ^= Da0; \
+ Bme0 = ROL32(A##ga0, 18); \
+ A##ke0 ^= De0; \
+ Bmi0 = ROL32(A##ke0, 5); \
+ A##mi1 ^= Di1; \
+ Bmo0 = ROL32(A##mi1, 8); \
+ A##so0 ^= Do0; \
+ Bmu0 = ROL32(A##so0, 28); \
+ E##ma0 = Bma0 ^( Bme0 & Bmi0 ); \
+ E##me0 = Bme0 ^( Bmi0 | Bmo0 ); \
+ E##mi0 = Bmi0 ^((~Bmo0)| Bmu0 ); \
+ E##mo0 = (~Bmo0)^( Bmu0 & Bma0 ); \
+ E##mu0 = Bmu0 ^( Bma0 | Bme0 ); \
+\
+ A##bu0 ^= Du0; \
+ Bma1 = ROL32(A##bu0, 13); \
+ A##ga1 ^= Da1; \
+ Bme1 = ROL32(A##ga1, 18); \
+ A##ke1 ^= De1; \
+ Bmi1 = ROL32(A##ke1, 5); \
+ A##mi0 ^= Di0; \
+ Bmo1 = ROL32(A##mi0, 7); \
+ A##so1 ^= Do1; \
+ Bmu1 = ROL32(A##so1, 28); \
+ E##ma1 = Bma1 ^( Bme1 & Bmi1 ); \
+ E##me1 = Bme1 ^( Bmi1 | Bmo1 ); \
+ E##mi1 = Bmi1 ^((~Bmo1)| Bmu1 ); \
+ E##mo1 = (~Bmo1)^( Bmu1 & Bma1 ); \
+ E##mu1 = Bmu1 ^( Bma1 | Bme1 ); \
+\
+ A##bi0 ^= Di0; \
+ Bsa0 = ROL32(A##bi0, 31); \
+ A##go1 ^= Do1; \
+ Bse0 = ROL32(A##go1, 28); \
+ A##ku1 ^= Du1; \
+ Bsi0 = ROL32(A##ku1, 20); \
+ A##ma1 ^= Da1; \
+ Bso0 = ROL32(A##ma1, 21); \
+ A##se0 ^= De0; \
+ Bsu0 = ROL32(A##se0, 1); \
+ E##sa0 = Bsa0 ^((~Bse0)& Bsi0 ); \
+ E##se0 = (~Bse0)^( Bsi0 | Bso0 ); \
+ E##si0 = Bsi0 ^( Bso0 & Bsu0 ); \
+ E##so0 = Bso0 ^( Bsu0 | Bsa0 ); \
+ E##su0 = Bsu0 ^( Bsa0 & Bse0 ); \
+\
+ A##bi1 ^= Di1; \
+ Bsa1 = ROL32(A##bi1, 31); \
+ A##go0 ^= Do0; \
+ Bse1 = ROL32(A##go0, 27); \
+ A##ku0 ^= Du0; \
+ Bsi1 = ROL32(A##ku0, 19); \
+ A##ma0 ^= Da0; \
+ Bso1 = ROL32(A##ma0, 20); \
+ A##se1 ^= De1; \
+ Bsu1 = ROL32(A##se1, 1); \
+ E##sa1 = Bsa1 ^((~Bse1)& Bsi1 ); \
+ E##se1 = (~Bse1)^( Bsi1 | Bso1 ); \
+ E##si1 = Bsi1 ^( Bso1 & Bsu1 ); \
+ E##so1 = Bso1 ^( Bsu1 | Bsa1 ); \
+ E##su1 = Bsu1 ^( Bsa1 & Bse1 ); \
+\
+
+#else /* UseBebigokimisa */
+/* --- Code for round, with prepare-theta */
+/* --- using factor 2 interleaving, 64-bit lanes mapped to 32-bit words */
+#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
+ Da0 = Cu0^ROL32(Ce1, 1); \
+ Da1 = Cu1^Ce0; \
+ De0 = Ca0^ROL32(Ci1, 1); \
+ De1 = Ca1^Ci0; \
+ Di0 = Ce0^ROL32(Co1, 1); \
+ Di1 = Ce1^Co0; \
+ Do0 = Ci0^ROL32(Cu1, 1); \
+ Do1 = Ci1^Cu0; \
+ Du0 = Co0^ROL32(Ca1, 1); \
+ Du1 = Co1^Ca0; \
+\
+ A##ba0 ^= Da0; \
+ Bba0 = A##ba0; \
+ A##ge0 ^= De0; \
+ Bbe0 = ROL32(A##ge0, 22); \
+ A##ki1 ^= Di1; \
+ Bbi0 = ROL32(A##ki1, 22); \
+ A##mo1 ^= Do1; \
+ Bbo0 = ROL32(A##mo1, 11); \
+ A##su0 ^= Du0; \
+ Bbu0 = ROL32(A##su0, 7); \
+ E##ba0 = Bba0 ^((~Bbe0)& Bbi0 ); \
+ E##ba0 ^= KeccakF1600RoundConstants_int2_0[i]; \
+ Ca0 = E##ba0; \
+ E##be0 = Bbe0 ^((~Bbi0)& Bbo0 ); \
+ Ce0 = E##be0; \
+ E##bi0 = Bbi0 ^((~Bbo0)& Bbu0 ); \
+ Ci0 = E##bi0; \
+ E##bo0 = Bbo0 ^((~Bbu0)& Bba0 ); \
+ Co0 = E##bo0; \
+ E##bu0 = Bbu0 ^((~Bba0)& Bbe0 ); \
+ Cu0 = E##bu0; \
+\
+ A##ba1 ^= Da1; \
+ Bba1 = A##ba1; \
+ A##ge1 ^= De1; \
+ Bbe1 = ROL32(A##ge1, 22); \
+ A##ki0 ^= Di0; \
+ Bbi1 = ROL32(A##ki0, 21); \
+ A##mo0 ^= Do0; \
+ Bbo1 = ROL32(A##mo0, 10); \
+ A##su1 ^= Du1; \
+ Bbu1 = ROL32(A##su1, 7); \
+ E##ba1 = Bba1 ^((~Bbe1)& Bbi1 ); \
+ E##ba1 ^= KeccakF1600RoundConstants_int2_1[i]; \
+ Ca1 = E##ba1; \
+ E##be1 = Bbe1 ^((~Bbi1)& Bbo1 ); \
+ Ce1 = E##be1; \
+ E##bi1 = Bbi1 ^((~Bbo1)& Bbu1 ); \
+ Ci1 = E##bi1; \
+ E##bo1 = Bbo1 ^((~Bbu1)& Bba1 ); \
+ Co1 = E##bo1; \
+ E##bu1 = Bbu1 ^((~Bba1)& Bbe1 ); \
+ Cu1 = E##bu1; \
+\
+ A##bo0 ^= Do0; \
+ Bga0 = ROL32(A##bo0, 14); \
+ A##gu0 ^= Du0; \
+ Bge0 = ROL32(A##gu0, 10); \
+ A##ka1 ^= Da1; \
+ Bgi0 = ROL32(A##ka1, 2); \
+ A##me1 ^= De1; \
+ Bgo0 = ROL32(A##me1, 23); \
+ A##si1 ^= Di1; \
+ Bgu0 = ROL32(A##si1, 31); \
+ E##ga0 = Bga0 ^((~Bge0)& Bgi0 ); \
+ Ca0 ^= E##ga0; \
+ E##ge0 = Bge0 ^((~Bgi0)& Bgo0 ); \
+ Ce0 ^= E##ge0; \
+ E##gi0 = Bgi0 ^((~Bgo0)& Bgu0 ); \
+ Ci0 ^= E##gi0; \
+ E##go0 = Bgo0 ^((~Bgu0)& Bga0 ); \
+ Co0 ^= E##go0; \
+ E##gu0 = Bgu0 ^((~Bga0)& Bge0 ); \
+ Cu0 ^= E##gu0; \
+\
+ A##bo1 ^= Do1; \
+ Bga1 = ROL32(A##bo1, 14); \
+ A##gu1 ^= Du1; \
+ Bge1 = ROL32(A##gu1, 10); \
+ A##ka0 ^= Da0; \
+ Bgi1 = ROL32(A##ka0, 1); \
+ A##me0 ^= De0; \
+ Bgo1 = ROL32(A##me0, 22); \
+ A##si0 ^= Di0; \
+ Bgu1 = ROL32(A##si0, 30); \
+ E##ga1 = Bga1 ^((~Bge1)& Bgi1 ); \
+ Ca1 ^= E##ga1; \
+ E##ge1 = Bge1 ^((~Bgi1)& Bgo1 ); \
+ Ce1 ^= E##ge1; \
+ E##gi1 = Bgi1 ^((~Bgo1)& Bgu1 ); \
+ Ci1 ^= E##gi1; \
+ E##go1 = Bgo1 ^((~Bgu1)& Bga1 ); \
+ Co1 ^= E##go1; \
+ E##gu1 = Bgu1 ^((~Bga1)& Bge1 ); \
+ Cu1 ^= E##gu1; \
+\
+ A##be1 ^= De1; \
+ Bka0 = ROL32(A##be1, 1); \
+ A##gi0 ^= Di0; \
+ Bke0 = ROL32(A##gi0, 3); \
+ A##ko1 ^= Do1; \
+ Bki0 = ROL32(A##ko1, 13); \
+ A##mu0 ^= Du0; \
+ Bko0 = ROL32(A##mu0, 4); \
+ A##sa0 ^= Da0; \
+ Bku0 = ROL32(A##sa0, 9); \
+ E##ka0 = Bka0 ^((~Bke0)& Bki0 ); \
+ Ca0 ^= E##ka0; \
+ E##ke0 = Bke0 ^((~Bki0)& Bko0 ); \
+ Ce0 ^= E##ke0; \
+ E##ki0 = Bki0 ^((~Bko0)& Bku0 ); \
+ Ci0 ^= E##ki0; \
+ E##ko0 = Bko0 ^((~Bku0)& Bka0 ); \
+ Co0 ^= E##ko0; \
+ E##ku0 = Bku0 ^((~Bka0)& Bke0 ); \
+ Cu0 ^= E##ku0; \
+\
+ A##be0 ^= De0; \
+ Bka1 = A##be0; \
+ A##gi1 ^= Di1; \
+ Bke1 = ROL32(A##gi1, 3); \
+ A##ko0 ^= Do0; \
+ Bki1 = ROL32(A##ko0, 12); \
+ A##mu1 ^= Du1; \
+ Bko1 = ROL32(A##mu1, 4); \
+ A##sa1 ^= Da1; \
+ Bku1 = ROL32(A##sa1, 9); \
+ E##ka1 = Bka1 ^((~Bke1)& Bki1 ); \
+ Ca1 ^= E##ka1; \
+ E##ke1 = Bke1 ^((~Bki1)& Bko1 ); \
+ Ce1 ^= E##ke1; \
+ E##ki1 = Bki1 ^((~Bko1)& Bku1 ); \
+ Ci1 ^= E##ki1; \
+ E##ko1 = Bko1 ^((~Bku1)& Bka1 ); \
+ Co1 ^= E##ko1; \
+ E##ku1 = Bku1 ^((~Bka1)& Bke1 ); \
+ Cu1 ^= E##ku1; \
+\
+ A##bu1 ^= Du1; \
+ Bma0 = ROL32(A##bu1, 14); \
+ A##ga0 ^= Da0; \
+ Bme0 = ROL32(A##ga0, 18); \
+ A##ke0 ^= De0; \
+ Bmi0 = ROL32(A##ke0, 5); \
+ A##mi1 ^= Di1; \
+ Bmo0 = ROL32(A##mi1, 8); \
+ A##so0 ^= Do0; \
+ Bmu0 = ROL32(A##so0, 28); \
+ E##ma0 = Bma0 ^((~Bme0)& Bmi0 ); \
+ Ca0 ^= E##ma0; \
+ E##me0 = Bme0 ^((~Bmi0)& Bmo0 ); \
+ Ce0 ^= E##me0; \
+ E##mi0 = Bmi0 ^((~Bmo0)& Bmu0 ); \
+ Ci0 ^= E##mi0; \
+ E##mo0 = Bmo0 ^((~Bmu0)& Bma0 ); \
+ Co0 ^= E##mo0; \
+ E##mu0 = Bmu0 ^((~Bma0)& Bme0 ); \
+ Cu0 ^= E##mu0; \
+\
+ A##bu0 ^= Du0; \
+ Bma1 = ROL32(A##bu0, 13); \
+ A##ga1 ^= Da1; \
+ Bme1 = ROL32(A##ga1, 18); \
+ A##ke1 ^= De1; \
+ Bmi1 = ROL32(A##ke1, 5); \
+ A##mi0 ^= Di0; \
+ Bmo1 = ROL32(A##mi0, 7); \
+ A##so1 ^= Do1; \
+ Bmu1 = ROL32(A##so1, 28); \
+ E##ma1 = Bma1 ^((~Bme1)& Bmi1 ); \
+ Ca1 ^= E##ma1; \
+ E##me1 = Bme1 ^((~Bmi1)& Bmo1 ); \
+ Ce1 ^= E##me1; \
+ E##mi1 = Bmi1 ^((~Bmo1)& Bmu1 ); \
+ Ci1 ^= E##mi1; \
+ E##mo1 = Bmo1 ^((~Bmu1)& Bma1 ); \
+ Co1 ^= E##mo1; \
+ E##mu1 = Bmu1 ^((~Bma1)& Bme1 ); \
+ Cu1 ^= E##mu1; \
+\
+ A##bi0 ^= Di0; \
+ Bsa0 = ROL32(A##bi0, 31); \
+ A##go1 ^= Do1; \
+ Bse0 = ROL32(A##go1, 28); \
+ A##ku1 ^= Du1; \
+ Bsi0 = ROL32(A##ku1, 20); \
+ A##ma1 ^= Da1; \
+ Bso0 = ROL32(A##ma1, 21); \
+ A##se0 ^= De0; \
+ Bsu0 = ROL32(A##se0, 1); \
+ E##sa0 = Bsa0 ^((~Bse0)& Bsi0 ); \
+ Ca0 ^= E##sa0; \
+ E##se0 = Bse0 ^((~Bsi0)& Bso0 ); \
+ Ce0 ^= E##se0; \
+ E##si0 = Bsi0 ^((~Bso0)& Bsu0 ); \
+ Ci0 ^= E##si0; \
+ E##so0 = Bso0 ^((~Bsu0)& Bsa0 ); \
+ Co0 ^= E##so0; \
+ E##su0 = Bsu0 ^((~Bsa0)& Bse0 ); \
+ Cu0 ^= E##su0; \
+\
+ A##bi1 ^= Di1; \
+ Bsa1 = ROL32(A##bi1, 31); \
+ A##go0 ^= Do0; \
+ Bse1 = ROL32(A##go0, 27); \
+ A##ku0 ^= Du0; \
+ Bsi1 = ROL32(A##ku0, 19); \
+ A##ma0 ^= Da0; \
+ Bso1 = ROL32(A##ma0, 20); \
+ A##se1 ^= De1; \
+ Bsu1 = ROL32(A##se1, 1); \
+ E##sa1 = Bsa1 ^((~Bse1)& Bsi1 ); \
+ Ca1 ^= E##sa1; \
+ E##se1 = Bse1 ^((~Bsi1)& Bso1 ); \
+ Ce1 ^= E##se1; \
+ E##si1 = Bsi1 ^((~Bso1)& Bsu1 ); \
+ Ci1 ^= E##si1; \
+ E##so1 = Bso1 ^((~Bsu1)& Bsa1 ); \
+ Co1 ^= E##so1; \
+ E##su1 = Bsu1 ^((~Bsa1)& Bse1 ); \
+ Cu1 ^= E##su1; \
+\
+
+/* --- Code for round */
+/* --- using factor 2 interleaving, 64-bit lanes mapped to 32-bit words */
+#define thetaRhoPiChiIota(i, A, E) \
+ Da0 = Cu0^ROL32(Ce1, 1); \
+ Da1 = Cu1^Ce0; \
+ De0 = Ca0^ROL32(Ci1, 1); \
+ De1 = Ca1^Ci0; \
+ Di0 = Ce0^ROL32(Co1, 1); \
+ Di1 = Ce1^Co0; \
+ Do0 = Ci0^ROL32(Cu1, 1); \
+ Do1 = Ci1^Cu0; \
+ Du0 = Co0^ROL32(Ca1, 1); \
+ Du1 = Co1^Ca0; \
+\
+ A##ba0 ^= Da0; \
+ Bba0 = A##ba0; \
+ A##ge0 ^= De0; \
+ Bbe0 = ROL32(A##ge0, 22); \
+ A##ki1 ^= Di1; \
+ Bbi0 = ROL32(A##ki1, 22); \
+ A##mo1 ^= Do1; \
+ Bbo0 = ROL32(A##mo1, 11); \
+ A##su0 ^= Du0; \
+ Bbu0 = ROL32(A##su0, 7); \
+ E##ba0 = Bba0 ^((~Bbe0)& Bbi0 ); \
+ E##ba0 ^= KeccakF1600RoundConstants_int2_0[i]; \
+ E##be0 = Bbe0 ^((~Bbi0)& Bbo0 ); \
+ E##bi0 = Bbi0 ^((~Bbo0)& Bbu0 ); \
+ E##bo0 = Bbo0 ^((~Bbu0)& Bba0 ); \
+ E##bu0 = Bbu0 ^((~Bba0)& Bbe0 ); \
+\
+ A##ba1 ^= Da1; \
+ Bba1 = A##ba1; \
+ A##ge1 ^= De1; \
+ Bbe1 = ROL32(A##ge1, 22); \
+ A##ki0 ^= Di0; \
+ Bbi1 = ROL32(A##ki0, 21); \
+ A##mo0 ^= Do0; \
+ Bbo1 = ROL32(A##mo0, 10); \
+ A##su1 ^= Du1; \
+ Bbu1 = ROL32(A##su1, 7); \
+ E##ba1 = Bba1 ^((~Bbe1)& Bbi1 ); \
+ E##ba1 ^= KeccakF1600RoundConstants_int2_1[i]; \
+ E##be1 = Bbe1 ^((~Bbi1)& Bbo1 ); \
+ E##bi1 = Bbi1 ^((~Bbo1)& Bbu1 ); \
+ E##bo1 = Bbo1 ^((~Bbu1)& Bba1 ); \
+ E##bu1 = Bbu1 ^((~Bba1)& Bbe1 ); \
+\
+ A##bo0 ^= Do0; \
+ Bga0 = ROL32(A##bo0, 14); \
+ A##gu0 ^= Du0; \
+ Bge0 = ROL32(A##gu0, 10); \
+ A##ka1 ^= Da1; \
+ Bgi0 = ROL32(A##ka1, 2); \
+ A##me1 ^= De1; \
+ Bgo0 = ROL32(A##me1, 23); \
+ A##si1 ^= Di1; \
+ Bgu0 = ROL32(A##si1, 31); \
+ E##ga0 = Bga0 ^((~Bge0)& Bgi0 ); \
+ E##ge0 = Bge0 ^((~Bgi0)& Bgo0 ); \
+ E##gi0 = Bgi0 ^((~Bgo0)& Bgu0 ); \
+ E##go0 = Bgo0 ^((~Bgu0)& Bga0 ); \
+ E##gu0 = Bgu0 ^((~Bga0)& Bge0 ); \
+\
+ A##bo1 ^= Do1; \
+ Bga1 = ROL32(A##bo1, 14); \
+ A##gu1 ^= Du1; \
+ Bge1 = ROL32(A##gu1, 10); \
+ A##ka0 ^= Da0; \
+ Bgi1 = ROL32(A##ka0, 1); \
+ A##me0 ^= De0; \
+ Bgo1 = ROL32(A##me0, 22); \
+ A##si0 ^= Di0; \
+ Bgu1 = ROL32(A##si0, 30); \
+ E##ga1 = Bga1 ^((~Bge1)& Bgi1 ); \
+ E##ge1 = Bge1 ^((~Bgi1)& Bgo1 ); \
+ E##gi1 = Bgi1 ^((~Bgo1)& Bgu1 ); \
+ E##go1 = Bgo1 ^((~Bgu1)& Bga1 ); \
+ E##gu1 = Bgu1 ^((~Bga1)& Bge1 ); \
+\
+ A##be1 ^= De1; \
+ Bka0 = ROL32(A##be1, 1); \
+ A##gi0 ^= Di0; \
+ Bke0 = ROL32(A##gi0, 3); \
+ A##ko1 ^= Do1; \
+ Bki0 = ROL32(A##ko1, 13); \
+ A##mu0 ^= Du0; \
+ Bko0 = ROL32(A##mu0, 4); \
+ A##sa0 ^= Da0; \
+ Bku0 = ROL32(A##sa0, 9); \
+ E##ka0 = Bka0 ^((~Bke0)& Bki0 ); \
+ E##ke0 = Bke0 ^((~Bki0)& Bko0 ); \
+ E##ki0 = Bki0 ^((~Bko0)& Bku0 ); \
+ E##ko0 = Bko0 ^((~Bku0)& Bka0 ); \
+ E##ku0 = Bku0 ^((~Bka0)& Bke0 ); \
+\
+ A##be0 ^= De0; \
+ Bka1 = A##be0; \
+ A##gi1 ^= Di1; \
+ Bke1 = ROL32(A##gi1, 3); \
+ A##ko0 ^= Do0; \
+ Bki1 = ROL32(A##ko0, 12); \
+ A##mu1 ^= Du1; \
+ Bko1 = ROL32(A##mu1, 4); \
+ A##sa1 ^= Da1; \
+ Bku1 = ROL32(A##sa1, 9); \
+ E##ka1 = Bka1 ^((~Bke1)& Bki1 ); \
+ E##ke1 = Bke1 ^((~Bki1)& Bko1 ); \
+ E##ki1 = Bki1 ^((~Bko1)& Bku1 ); \
+ E##ko1 = Bko1 ^((~Bku1)& Bka1 ); \
+ E##ku1 = Bku1 ^((~Bka1)& Bke1 ); \
+\
+ A##bu1 ^= Du1; \
+ Bma0 = ROL32(A##bu1, 14); \
+ A##ga0 ^= Da0; \
+ Bme0 = ROL32(A##ga0, 18); \
+ A##ke0 ^= De0; \
+ Bmi0 = ROL32(A##ke0, 5); \
+ A##mi1 ^= Di1; \
+ Bmo0 = ROL32(A##mi1, 8); \
+ A##so0 ^= Do0; \
+ Bmu0 = ROL32(A##so0, 28); \
+ E##ma0 = Bma0 ^((~Bme0)& Bmi0 ); \
+ E##me0 = Bme0 ^((~Bmi0)& Bmo0 ); \
+ E##mi0 = Bmi0 ^((~Bmo0)& Bmu0 ); \
+ E##mo0 = Bmo0 ^((~Bmu0)& Bma0 ); \
+ E##mu0 = Bmu0 ^((~Bma0)& Bme0 ); \
+\
+ A##bu0 ^= Du0; \
+ Bma1 = ROL32(A##bu0, 13); \
+ A##ga1 ^= Da1; \
+ Bme1 = ROL32(A##ga1, 18); \
+ A##ke1 ^= De1; \
+ Bmi1 = ROL32(A##ke1, 5); \
+ A##mi0 ^= Di0; \
+ Bmo1 = ROL32(A##mi0, 7); \
+ A##so1 ^= Do1; \
+ Bmu1 = ROL32(A##so1, 28); \
+ E##ma1 = Bma1 ^((~Bme1)& Bmi1 ); \
+ E##me1 = Bme1 ^((~Bmi1)& Bmo1 ); \
+ E##mi1 = Bmi1 ^((~Bmo1)& Bmu1 ); \
+ E##mo1 = Bmo1 ^((~Bmu1)& Bma1 ); \
+ E##mu1 = Bmu1 ^((~Bma1)& Bme1 ); \
+\
+ A##bi0 ^= Di0; \
+ Bsa0 = ROL32(A##bi0, 31); \
+ A##go1 ^= Do1; \
+ Bse0 = ROL32(A##go1, 28); \
+ A##ku1 ^= Du1; \
+ Bsi0 = ROL32(A##ku1, 20); \
+ A##ma1 ^= Da1; \
+ Bso0 = ROL32(A##ma1, 21); \
+ A##se0 ^= De0; \
+ Bsu0 = ROL32(A##se0, 1); \
+ E##sa0 = Bsa0 ^((~Bse0)& Bsi0 ); \
+ E##se0 = Bse0 ^((~Bsi0)& Bso0 ); \
+ E##si0 = Bsi0 ^((~Bso0)& Bsu0 ); \
+ E##so0 = Bso0 ^((~Bsu0)& Bsa0 ); \
+ E##su0 = Bsu0 ^((~Bsa0)& Bse0 ); \
+\
+ A##bi1 ^= Di1; \
+ Bsa1 = ROL32(A##bi1, 31); \
+ A##go0 ^= Do0; \
+ Bse1 = ROL32(A##go0, 27); \
+ A##ku0 ^= Du0; \
+ Bsi1 = ROL32(A##ku0, 19); \
+ A##ma0 ^= Da0; \
+ Bso1 = ROL32(A##ma0, 20); \
+ A##se1 ^= De1; \
+ Bsu1 = ROL32(A##se1, 1); \
+ E##sa1 = Bsa1 ^((~Bse1)& Bsi1 ); \
+ E##se1 = Bse1 ^((~Bsi1)& Bso1 ); \
+ E##si1 = Bsi1 ^((~Bso1)& Bsu1 ); \
+ E##so1 = Bso1 ^((~Bsu1)& Bsa1 ); \
+ E##su1 = Bsu1 ^((~Bsa1)& Bse1 ); \
+\
+
+#endif /* UseBebigokimisa */
+
+const UINT32 KeccakF1600RoundConstants_int2_0[24] = {
+ 0x00000001UL,
+ 0x00000000UL,
+ 0x00000000UL,
+ 0x00000000UL,
+ 0x00000001UL,
+ 0x00000001UL,
+ 0x00000001UL,
+ 0x00000001UL,
+ 0x00000000UL,
+ 0x00000000UL,
+ 0x00000001UL,
+ 0x00000000UL,
+ 0x00000001UL,
+ 0x00000001UL,
+ 0x00000001UL,
+ 0x00000001UL,
+ 0x00000000UL,
+ 0x00000000UL,
+ 0x00000000UL,
+ 0x00000000UL,
+ 0x00000001UL,
+ 0x00000000UL,
+ 0x00000001UL,
+ 0x00000000UL };
+
+const UINT32 KeccakF1600RoundConstants_int2_1[24] = {
+ 0x00000000UL,
+ 0x00000089UL,
+ 0x8000008bUL,
+ 0x80008080UL,
+ 0x0000008bUL,
+ 0x00008000UL,
+ 0x80008088UL,
+ 0x80000082UL,
+ 0x0000000bUL,
+ 0x0000000aUL,
+ 0x00008082UL,
+ 0x00008003UL,
+ 0x0000808bUL,
+ 0x8000000bUL,
+ 0x8000008aUL,
+ 0x80000081UL,
+ 0x80000081UL,
+ 0x80000008UL,
+ 0x00000083UL,
+ 0x80008003UL,
+ 0x80008088UL,
+ 0x80000088UL,
+ 0x00008000UL,
+ 0x80008082UL };
+
+#define copyFromStateAndXor1024bits(X, state, input) \
+ X##ba0 = state[ 0]^input[ 0]; \
+ X##ba1 = state[ 1]^input[ 1]; \
+ X##be0 = state[ 2]^input[ 2]; \
+ X##be1 = state[ 3]^input[ 3]; \
+ X##bi0 = state[ 4]^input[ 4]; \
+ X##bi1 = state[ 5]^input[ 5]; \
+ X##bo0 = state[ 6]^input[ 6]; \
+ X##bo1 = state[ 7]^input[ 7]; \
+ X##bu0 = state[ 8]^input[ 8]; \
+ X##bu1 = state[ 9]^input[ 9]; \
+ X##ga0 = state[10]^input[10]; \
+ X##ga1 = state[11]^input[11]; \
+ X##ge0 = state[12]^input[12]; \
+ X##ge1 = state[13]^input[13]; \
+ X##gi0 = state[14]^input[14]; \
+ X##gi1 = state[15]^input[15]; \
+ X##go0 = state[16]^input[16]; \
+ X##go1 = state[17]^input[17]; \
+ X##gu0 = state[18]^input[18]; \
+ X##gu1 = state[19]^input[19]; \
+ X##ka0 = state[20]^input[20]; \
+ X##ka1 = state[21]^input[21]; \
+ X##ke0 = state[22]^input[22]; \
+ X##ke1 = state[23]^input[23]; \
+ X##ki0 = state[24]^input[24]; \
+ X##ki1 = state[25]^input[25]; \
+ X##ko0 = state[26]^input[26]; \
+ X##ko1 = state[27]^input[27]; \
+ X##ku0 = state[28]^input[28]; \
+ X##ku1 = state[29]^input[29]; \
+ X##ma0 = state[30]^input[30]; \
+ X##ma1 = state[31]^input[31]; \
+ X##me0 = state[32]; \
+ X##me1 = state[33]; \
+ X##mi0 = state[34]; \
+ X##mi1 = state[35]; \
+ X##mo0 = state[36]; \
+ X##mo1 = state[37]; \
+ X##mu0 = state[38]; \
+ X##mu1 = state[39]; \
+ X##sa0 = state[40]; \
+ X##sa1 = state[41]; \
+ X##se0 = state[42]; \
+ X##se1 = state[43]; \
+ X##si0 = state[44]; \
+ X##si1 = state[45]; \
+ X##so0 = state[46]; \
+ X##so1 = state[47]; \
+ X##su0 = state[48]; \
+ X##su1 = state[49]; \
+
+#define copyFromStateAndXor1088bits(X, state, input) \
+ X##ba0 = state[ 0]^input[ 0]; \
+ X##ba1 = state[ 1]^input[ 1]; \
+ X##be0 = state[ 2]^input[ 2]; \
+ X##be1 = state[ 3]^input[ 3]; \
+ X##bi0 = state[ 4]^input[ 4]; \
+ X##bi1 = state[ 5]^input[ 5]; \
+ X##bo0 = state[ 6]^input[ 6]; \
+ X##bo1 = state[ 7]^input[ 7]; \
+ X##bu0 = state[ 8]^input[ 8]; \
+ X##bu1 = state[ 9]^input[ 9]; \
+ X##ga0 = state[10]^input[10]; \
+ X##ga1 = state[11]^input[11]; \
+ X##ge0 = state[12]^input[12]; \
+ X##ge1 = state[13]^input[13]; \
+ X##gi0 = state[14]^input[14]; \
+ X##gi1 = state[15]^input[15]; \
+ X##go0 = state[16]^input[16]; \
+ X##go1 = state[17]^input[17]; \
+ X##gu0 = state[18]^input[18]; \
+ X##gu1 = state[19]^input[19]; \
+ X##ka0 = state[20]^input[20]; \
+ X##ka1 = state[21]^input[21]; \
+ X##ke0 = state[22]^input[22]; \
+ X##ke1 = state[23]^input[23]; \
+ X##ki0 = state[24]^input[24]; \
+ X##ki1 = state[25]^input[25]; \
+ X##ko0 = state[26]^input[26]; \
+ X##ko1 = state[27]^input[27]; \
+ X##ku0 = state[28]^input[28]; \
+ X##ku1 = state[29]^input[29]; \
+ X##ma0 = state[30]^input[30]; \
+ X##ma1 = state[31]^input[31]; \
+ X##me0 = state[32]^input[32]; \
+ X##me1 = state[33]^input[33]; \
+ X##mi0 = state[34]; \
+ X##mi1 = state[35]; \
+ X##mo0 = state[36]; \
+ X##mo1 = state[37]; \
+ X##mu0 = state[38]; \
+ X##mu1 = state[39]; \
+ X##sa0 = state[40]; \
+ X##sa1 = state[41]; \
+ X##se0 = state[42]; \
+ X##se1 = state[43]; \
+ X##si0 = state[44]; \
+ X##si1 = state[45]; \
+ X##so0 = state[46]; \
+ X##so1 = state[47]; \
+ X##su0 = state[48]; \
+ X##su1 = state[49]; \
+
+#define copyFromState(X, state) \
+ X##ba0 = state[ 0]; \
+ X##ba1 = state[ 1]; \
+ X##be0 = state[ 2]; \
+ X##be1 = state[ 3]; \
+ X##bi0 = state[ 4]; \
+ X##bi1 = state[ 5]; \
+ X##bo0 = state[ 6]; \
+ X##bo1 = state[ 7]; \
+ X##bu0 = state[ 8]; \
+ X##bu1 = state[ 9]; \
+ X##ga0 = state[10]; \
+ X##ga1 = state[11]; \
+ X##ge0 = state[12]; \
+ X##ge1 = state[13]; \
+ X##gi0 = state[14]; \
+ X##gi1 = state[15]; \
+ X##go0 = state[16]; \
+ X##go1 = state[17]; \
+ X##gu0 = state[18]; \
+ X##gu1 = state[19]; \
+ X##ka0 = state[20]; \
+ X##ka1 = state[21]; \
+ X##ke0 = state[22]; \
+ X##ke1 = state[23]; \
+ X##ki0 = state[24]; \
+ X##ki1 = state[25]; \
+ X##ko0 = state[26]; \
+ X##ko1 = state[27]; \
+ X##ku0 = state[28]; \
+ X##ku1 = state[29]; \
+ X##ma0 = state[30]; \
+ X##ma1 = state[31]; \
+ X##me0 = state[32]; \
+ X##me1 = state[33]; \
+ X##mi0 = state[34]; \
+ X##mi1 = state[35]; \
+ X##mo0 = state[36]; \
+ X##mo1 = state[37]; \
+ X##mu0 = state[38]; \
+ X##mu1 = state[39]; \
+ X##sa0 = state[40]; \
+ X##sa1 = state[41]; \
+ X##se0 = state[42]; \
+ X##se1 = state[43]; \
+ X##si0 = state[44]; \
+ X##si1 = state[45]; \
+ X##so0 = state[46]; \
+ X##so1 = state[47]; \
+ X##su0 = state[48]; \
+ X##su1 = state[49]; \
+
+#define copyToState(state, X) \
+ state[ 0] = X##ba0; \
+ state[ 1] = X##ba1; \
+ state[ 2] = X##be0; \
+ state[ 3] = X##be1; \
+ state[ 4] = X##bi0; \
+ state[ 5] = X##bi1; \
+ state[ 6] = X##bo0; \
+ state[ 7] = X##bo1; \
+ state[ 8] = X##bu0; \
+ state[ 9] = X##bu1; \
+ state[10] = X##ga0; \
+ state[11] = X##ga1; \
+ state[12] = X##ge0; \
+ state[13] = X##ge1; \
+ state[14] = X##gi0; \
+ state[15] = X##gi1; \
+ state[16] = X##go0; \
+ state[17] = X##go1; \
+ state[18] = X##gu0; \
+ state[19] = X##gu1; \
+ state[20] = X##ka0; \
+ state[21] = X##ka1; \
+ state[22] = X##ke0; \
+ state[23] = X##ke1; \
+ state[24] = X##ki0; \
+ state[25] = X##ki1; \
+ state[26] = X##ko0; \
+ state[27] = X##ko1; \
+ state[28] = X##ku0; \
+ state[29] = X##ku1; \
+ state[30] = X##ma0; \
+ state[31] = X##ma1; \
+ state[32] = X##me0; \
+ state[33] = X##me1; \
+ state[34] = X##mi0; \
+ state[35] = X##mi1; \
+ state[36] = X##mo0; \
+ state[37] = X##mo1; \
+ state[38] = X##mu0; \
+ state[39] = X##mu1; \
+ state[40] = X##sa0; \
+ state[41] = X##sa1; \
+ state[42] = X##se0; \
+ state[43] = X##se1; \
+ state[44] = X##si0; \
+ state[45] = X##si1; \
+ state[46] = X##so0; \
+ state[47] = X##so1; \
+ state[48] = X##su0; \
+ state[49] = X##su1; \
+
+#define copyStateVariables(X, Y) \
+ X##ba0 = Y##ba0; \
+ X##ba1 = Y##ba1; \
+ X##be0 = Y##be0; \
+ X##be1 = Y##be1; \
+ X##bi0 = Y##bi0; \
+ X##bi1 = Y##bi1; \
+ X##bo0 = Y##bo0; \
+ X##bo1 = Y##bo1; \
+ X##bu0 = Y##bu0; \
+ X##bu1 = Y##bu1; \
+ X##ga0 = Y##ga0; \
+ X##ga1 = Y##ga1; \
+ X##ge0 = Y##ge0; \
+ X##ge1 = Y##ge1; \
+ X##gi0 = Y##gi0; \
+ X##gi1 = Y##gi1; \
+ X##go0 = Y##go0; \
+ X##go1 = Y##go1; \
+ X##gu0 = Y##gu0; \
+ X##gu1 = Y##gu1; \
+ X##ka0 = Y##ka0; \
+ X##ka1 = Y##ka1; \
+ X##ke0 = Y##ke0; \
+ X##ke1 = Y##ke1; \
+ X##ki0 = Y##ki0; \
+ X##ki1 = Y##ki1; \
+ X##ko0 = Y##ko0; \
+ X##ko1 = Y##ko1; \
+ X##ku0 = Y##ku0; \
+ X##ku1 = Y##ku1; \
+ X##ma0 = Y##ma0; \
+ X##ma1 = Y##ma1; \
+ X##me0 = Y##me0; \
+ X##me1 = Y##me1; \
+ X##mi0 = Y##mi0; \
+ X##mi1 = Y##mi1; \
+ X##mo0 = Y##mo0; \
+ X##mo1 = Y##mo1; \
+ X##mu0 = Y##mu0; \
+ X##mu1 = Y##mu1; \
+ X##sa0 = Y##sa0; \
+ X##sa1 = Y##sa1; \
+ X##se0 = Y##se0; \
+ X##se1 = Y##se1; \
+ X##si0 = Y##si0; \
+ X##si1 = Y##si1; \
+ X##so0 = Y##so0; \
+ X##so1 = Y##so1; \
+ X##su0 = Y##su0; \
+ X##su1 = Y##su1; \
+
diff --git a/Modules/_sha3/keccak/KeccakF-1600-32-s2.macros b/Modules/_sha3/keccak/KeccakF-1600-32-s2.macros
new file mode 100644
index 0000000000..fa1176219a
--- /dev/null
+++ b/Modules/_sha3/keccak/KeccakF-1600-32-s2.macros
@@ -0,0 +1,1187 @@
+/*
+Code automatically generated by KeccakTools!
+
+The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
+Michaël Peeters and Gilles Van Assche. For more information, feedback or
+questions, please refer to our website: http://keccak.noekeon.org/
+
+Implementation by the designers,
+hereby denoted as "the implementer".
+
+To the extent possible under law, the implementer has waived all copyright
+and related or neighboring rights to the source code in this file.
+http://creativecommons.org/publicdomain/zero/1.0/
+*/
+
+#define declareABCDE \
+ UINT32 Aba0, Abe0, Abi0, Abo0, Abu0; \
+ UINT32 Aba1, Abe1, Abi1, Abo1, Abu1; \
+ UINT32 Aga0, Age0, Agi0, Ago0, Agu0; \
+ UINT32 Aga1, Age1, Agi1, Ago1, Agu1; \
+ UINT32 Aka0, Ake0, Aki0, Ako0, Aku0; \
+ UINT32 Aka1, Ake1, Aki1, Ako1, Aku1; \
+ UINT32 Ama0, Ame0, Ami0, Amo0, Amu0; \
+ UINT32 Ama1, Ame1, Ami1, Amo1, Amu1; \
+ UINT32 Asa0, Ase0, Asi0, Aso0, Asu0; \
+ UINT32 Asa1, Ase1, Asi1, Aso1, Asu1; \
+ UINT32 Bba0, Bbe0, Bbi0, Bbo0, Bbu0; \
+ UINT32 Bba1, Bbe1, Bbi1, Bbo1, Bbu1; \
+ UINT32 Bga0, Bge0, Bgi0, Bgo0, Bgu0; \
+ UINT32 Bga1, Bge1, Bgi1, Bgo1, Bgu1; \
+ UINT32 Bka0, Bke0, Bki0, Bko0, Bku0; \
+ UINT32 Bka1, Bke1, Bki1, Bko1, Bku1; \
+ UINT32 Bma0, Bme0, Bmi0, Bmo0, Bmu0; \
+ UINT32 Bma1, Bme1, Bmi1, Bmo1, Bmu1; \
+ UINT32 Bsa0, Bse0, Bsi0, Bso0, Bsu0; \
+ UINT32 Bsa1, Bse1, Bsi1, Bso1, Bsu1; \
+ UINT32 Ca0, Ce0, Ci0, Co0, Cu0; \
+ UINT32 Ca1, Ce1, Ci1, Co1, Cu1; \
+ UINT32 Da0, De0, Di0, Do0, Du0; \
+ UINT32 Da1, De1, Di1, Do1, Du1; \
+ UINT32 Eba0, Ebe0, Ebi0, Ebo0, Ebu0; \
+ UINT32 Eba1, Ebe1, Ebi1, Ebo1, Ebu1; \
+ UINT32 Ega0, Ege0, Egi0, Ego0, Egu0; \
+ UINT32 Ega1, Ege1, Egi1, Ego1, Egu1; \
+ UINT32 Eka0, Eke0, Eki0, Eko0, Eku0; \
+ UINT32 Eka1, Eke1, Eki1, Eko1, Eku1; \
+ UINT32 Ema0, Eme0, Emi0, Emo0, Emu0; \
+ UINT32 Ema1, Eme1, Emi1, Emo1, Emu1; \
+ UINT32 Esa0, Ese0, Esi0, Eso0, Esu0; \
+ UINT32 Esa1, Ese1, Esi1, Eso1, Esu1; \
+
+#define prepareTheta \
+ Ca0 = Aba0^Aga0^Aka0^Ama0^Asa0; \
+ Ca1 = Aba1^Aga1^Aka1^Ama1^Asa1; \
+ Ce0 = Abe0^Age0^Ake0^Ame0^Ase0; \
+ Ce1 = Abe1^Age1^Ake1^Ame1^Ase1; \
+ Ci0 = Abi0^Agi0^Aki0^Ami0^Asi0; \
+ Ci1 = Abi1^Agi1^Aki1^Ami1^Asi1; \
+ Co0 = Abo0^Ago0^Ako0^Amo0^Aso0; \
+ Co1 = Abo1^Ago1^Ako1^Amo1^Aso1; \
+ Cu0 = Abu0^Agu0^Aku0^Amu0^Asu0; \
+ Cu1 = Abu1^Agu1^Aku1^Amu1^Asu1; \
+
+#ifdef UseBebigokimisa
+/* --- Code for round, with prepare-theta (lane complementing pattern 'bebigokimisa') */
+/* --- using factor 2 interleaving, 64-bit lanes mapped to 32-bit words */
+#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
+ Da0 = Cu0^ROL32(Ce1, 1); \
+ Da1 = Cu1^Ce0; \
+ De0 = Ca0^ROL32(Ci1, 1); \
+ De1 = Ca1^Ci0; \
+ Di0 = Ce0^ROL32(Co1, 1); \
+ Di1 = Ce1^Co0; \
+ Do0 = Ci0^ROL32(Cu1, 1); \
+ Do1 = Ci1^Cu0; \
+ Du0 = Co0^ROL32(Ca1, 1); \
+ Du1 = Co1^Ca0; \
+\
+ A##ba0 ^= Da0; \
+ Bba0 = A##ba0; \
+ A##ge0 ^= De0; \
+ Bbe0 = ROL32(A##ge0, 22); \
+ A##ki1 ^= Di1; \
+ Bbi0 = ROL32(A##ki1, 22); \
+ E##ba0 = Bba0 ^( Bbe0 | Bbi0 ); \
+ E##ba0 ^= KeccakF1600RoundConstants_int2_0[i]; \
+ Ca0 = E##ba0; \
+ A##mo1 ^= Do1; \
+ Bbo0 = ROL32(A##mo1, 11); \
+ E##be0 = Bbe0 ^((~Bbi0)| Bbo0 ); \
+ Ce0 = E##be0; \
+ A##su0 ^= Du0; \
+ Bbu0 = ROL32(A##su0, 7); \
+ E##bi0 = Bbi0 ^( Bbo0 & Bbu0 ); \
+ Ci0 = E##bi0; \
+ E##bo0 = Bbo0 ^( Bbu0 | Bba0 ); \
+ Co0 = E##bo0; \
+ E##bu0 = Bbu0 ^( Bba0 & Bbe0 ); \
+ Cu0 = E##bu0; \
+\
+ A##ba1 ^= Da1; \
+ Bba1 = A##ba1; \
+ A##ge1 ^= De1; \
+ Bbe1 = ROL32(A##ge1, 22); \
+ A##ki0 ^= Di0; \
+ Bbi1 = ROL32(A##ki0, 21); \
+ E##ba1 = Bba1 ^( Bbe1 | Bbi1 ); \
+ E##ba1 ^= KeccakF1600RoundConstants_int2_1[i]; \
+ Ca1 = E##ba1; \
+ A##mo0 ^= Do0; \
+ Bbo1 = ROL32(A##mo0, 10); \
+ E##be1 = Bbe1 ^((~Bbi1)| Bbo1 ); \
+ Ce1 = E##be1; \
+ A##su1 ^= Du1; \
+ Bbu1 = ROL32(A##su1, 7); \
+ E##bi1 = Bbi1 ^( Bbo1 & Bbu1 ); \
+ Ci1 = E##bi1; \
+ E##bo1 = Bbo1 ^( Bbu1 | Bba1 ); \
+ Co1 = E##bo1; \
+ E##bu1 = Bbu1 ^( Bba1 & Bbe1 ); \
+ Cu1 = E##bu1; \
+\
+ A##bo0 ^= Do0; \
+ Bga0 = ROL32(A##bo0, 14); \
+ A##gu0 ^= Du0; \
+ Bge0 = ROL32(A##gu0, 10); \
+ A##ka1 ^= Da1; \
+ Bgi0 = ROL32(A##ka1, 2); \
+ E##ga0 = Bga0 ^( Bge0 | Bgi0 ); \
+ Ca0 ^= E##ga0; \
+ A##me1 ^= De1; \
+ Bgo0 = ROL32(A##me1, 23); \
+ E##ge0 = Bge0 ^( Bgi0 & Bgo0 ); \
+ Ce0 ^= E##ge0; \
+ A##si1 ^= Di1; \
+ Bgu0 = ROL32(A##si1, 31); \
+ E##gi0 = Bgi0 ^( Bgo0 |(~Bgu0)); \
+ Ci0 ^= E##gi0; \
+ E##go0 = Bgo0 ^( Bgu0 | Bga0 ); \
+ Co0 ^= E##go0; \
+ E##gu0 = Bgu0 ^( Bga0 & Bge0 ); \
+ Cu0 ^= E##gu0; \
+\
+ A##bo1 ^= Do1; \
+ Bga1 = ROL32(A##bo1, 14); \
+ A##gu1 ^= Du1; \
+ Bge1 = ROL32(A##gu1, 10); \
+ A##ka0 ^= Da0; \
+ Bgi1 = ROL32(A##ka0, 1); \
+ E##ga1 = Bga1 ^( Bge1 | Bgi1 ); \
+ Ca1 ^= E##ga1; \
+ A##me0 ^= De0; \
+ Bgo1 = ROL32(A##me0, 22); \
+ E##ge1 = Bge1 ^( Bgi1 & Bgo1 ); \
+ Ce1 ^= E##ge1; \
+ A##si0 ^= Di0; \
+ Bgu1 = ROL32(A##si0, 30); \
+ E##gi1 = Bgi1 ^( Bgo1 |(~Bgu1)); \
+ Ci1 ^= E##gi1; \
+ E##go1 = Bgo1 ^( Bgu1 | Bga1 ); \
+ Co1 ^= E##go1; \
+ E##gu1 = Bgu1 ^( Bga1 & Bge1 ); \
+ Cu1 ^= E##gu1; \
+\
+ A##be1 ^= De1; \
+ Bka0 = ROL32(A##be1, 1); \
+ A##gi0 ^= Di0; \
+ Bke0 = ROL32(A##gi0, 3); \
+ A##ko1 ^= Do1; \
+ Bki0 = ROL32(A##ko1, 13); \
+ E##ka0 = Bka0 ^( Bke0 | Bki0 ); \
+ Ca0 ^= E##ka0; \
+ A##mu0 ^= Du0; \
+ Bko0 = ROL32(A##mu0, 4); \
+ E##ke0 = Bke0 ^( Bki0 & Bko0 ); \
+ Ce0 ^= E##ke0; \
+ A##sa0 ^= Da0; \
+ Bku0 = ROL32(A##sa0, 9); \
+ E##ki0 = Bki0 ^((~Bko0)& Bku0 ); \
+ Ci0 ^= E##ki0; \
+ E##ko0 = (~Bko0)^( Bku0 | Bka0 ); \
+ Co0 ^= E##ko0; \
+ E##ku0 = Bku0 ^( Bka0 & Bke0 ); \
+ Cu0 ^= E##ku0; \
+\
+ A##be0 ^= De0; \
+ Bka1 = A##be0; \
+ A##gi1 ^= Di1; \
+ Bke1 = ROL32(A##gi1, 3); \
+ A##ko0 ^= Do0; \
+ Bki1 = ROL32(A##ko0, 12); \
+ E##ka1 = Bka1 ^( Bke1 | Bki1 ); \
+ Ca1 ^= E##ka1; \
+ A##mu1 ^= Du1; \
+ Bko1 = ROL32(A##mu1, 4); \
+ E##ke1 = Bke1 ^( Bki1 & Bko1 ); \
+ Ce1 ^= E##ke1; \
+ A##sa1 ^= Da1; \
+ Bku1 = ROL32(A##sa1, 9); \
+ E##ki1 = Bki1 ^((~Bko1)& Bku1 ); \
+ Ci1 ^= E##ki1; \
+ E##ko1 = (~Bko1)^( Bku1 | Bka1 ); \
+ Co1 ^= E##ko1; \
+ E##ku1 = Bku1 ^( Bka1 & Bke1 ); \
+ Cu1 ^= E##ku1; \
+\
+ A##bu1 ^= Du1; \
+ Bma0 = ROL32(A##bu1, 14); \
+ A##ga0 ^= Da0; \
+ Bme0 = ROL32(A##ga0, 18); \
+ A##ke0 ^= De0; \
+ Bmi0 = ROL32(A##ke0, 5); \
+ E##ma0 = Bma0 ^( Bme0 & Bmi0 ); \
+ Ca0 ^= E##ma0; \
+ A##mi1 ^= Di1; \
+ Bmo0 = ROL32(A##mi1, 8); \
+ E##me0 = Bme0 ^( Bmi0 | Bmo0 ); \
+ Ce0 ^= E##me0; \
+ A##so0 ^= Do0; \
+ Bmu0 = ROL32(A##so0, 28); \
+ E##mi0 = Bmi0 ^((~Bmo0)| Bmu0 ); \
+ Ci0 ^= E##mi0; \
+ E##mo0 = (~Bmo0)^( Bmu0 & Bma0 ); \
+ Co0 ^= E##mo0; \
+ E##mu0 = Bmu0 ^( Bma0 | Bme0 ); \
+ Cu0 ^= E##mu0; \
+\
+ A##bu0 ^= Du0; \
+ Bma1 = ROL32(A##bu0, 13); \
+ A##ga1 ^= Da1; \
+ Bme1 = ROL32(A##ga1, 18); \
+ A##ke1 ^= De1; \
+ Bmi1 = ROL32(A##ke1, 5); \
+ E##ma1 = Bma1 ^( Bme1 & Bmi1 ); \
+ Ca1 ^= E##ma1; \
+ A##mi0 ^= Di0; \
+ Bmo1 = ROL32(A##mi0, 7); \
+ E##me1 = Bme1 ^( Bmi1 | Bmo1 ); \
+ Ce1 ^= E##me1; \
+ A##so1 ^= Do1; \
+ Bmu1 = ROL32(A##so1, 28); \
+ E##mi1 = Bmi1 ^((~Bmo1)| Bmu1 ); \
+ Ci1 ^= E##mi1; \
+ E##mo1 = (~Bmo1)^( Bmu1 & Bma1 ); \
+ Co1 ^= E##mo1; \
+ E##mu1 = Bmu1 ^( Bma1 | Bme1 ); \
+ Cu1 ^= E##mu1; \
+\
+ A##bi0 ^= Di0; \
+ Bsa0 = ROL32(A##bi0, 31); \
+ A##go1 ^= Do1; \
+ Bse0 = ROL32(A##go1, 28); \
+ A##ku1 ^= Du1; \
+ Bsi0 = ROL32(A##ku1, 20); \
+ E##sa0 = Bsa0 ^((~Bse0)& Bsi0 ); \
+ Ca0 ^= E##sa0; \
+ A##ma1 ^= Da1; \
+ Bso0 = ROL32(A##ma1, 21); \
+ E##se0 = (~Bse0)^( Bsi0 | Bso0 ); \
+ Ce0 ^= E##se0; \
+ A##se0 ^= De0; \
+ Bsu0 = ROL32(A##se0, 1); \
+ E##si0 = Bsi0 ^( Bso0 & Bsu0 ); \
+ Ci0 ^= E##si0; \
+ E##so0 = Bso0 ^( Bsu0 | Bsa0 ); \
+ Co0 ^= E##so0; \
+ E##su0 = Bsu0 ^( Bsa0 & Bse0 ); \
+ Cu0 ^= E##su0; \
+\
+ A##bi1 ^= Di1; \
+ Bsa1 = ROL32(A##bi1, 31); \
+ A##go0 ^= Do0; \
+ Bse1 = ROL32(A##go0, 27); \
+ A##ku0 ^= Du0; \
+ Bsi1 = ROL32(A##ku0, 19); \
+ E##sa1 = Bsa1 ^((~Bse1)& Bsi1 ); \
+ Ca1 ^= E##sa1; \
+ A##ma0 ^= Da0; \
+ Bso1 = ROL32(A##ma0, 20); \
+ E##se1 = (~Bse1)^( Bsi1 | Bso1 ); \
+ Ce1 ^= E##se1; \
+ A##se1 ^= De1; \
+ Bsu1 = ROL32(A##se1, 1); \
+ E##si1 = Bsi1 ^( Bso1 & Bsu1 ); \
+ Ci1 ^= E##si1; \
+ E##so1 = Bso1 ^( Bsu1 | Bsa1 ); \
+ Co1 ^= E##so1; \
+ E##su1 = Bsu1 ^( Bsa1 & Bse1 ); \
+ Cu1 ^= E##su1; \
+\
+
+/* --- Code for round (lane complementing pattern 'bebigokimisa') */
+/* --- using factor 2 interleaving, 64-bit lanes mapped to 32-bit words */
+#define thetaRhoPiChiIota(i, A, E) \
+ Da0 = Cu0^ROL32(Ce1, 1); \
+ Da1 = Cu1^Ce0; \
+ De0 = Ca0^ROL32(Ci1, 1); \
+ De1 = Ca1^Ci0; \
+ Di0 = Ce0^ROL32(Co1, 1); \
+ Di1 = Ce1^Co0; \
+ Do0 = Ci0^ROL32(Cu1, 1); \
+ Do1 = Ci1^Cu0; \
+ Du0 = Co0^ROL32(Ca1, 1); \
+ Du1 = Co1^Ca0; \
+\
+ A##ba0 ^= Da0; \
+ Bba0 = A##ba0; \
+ A##ge0 ^= De0; \
+ Bbe0 = ROL32(A##ge0, 22); \
+ A##ki1 ^= Di1; \
+ Bbi0 = ROL32(A##ki1, 22); \
+ E##ba0 = Bba0 ^( Bbe0 | Bbi0 ); \
+ E##ba0 ^= KeccakF1600RoundConstants_int2_0[i]; \
+ A##mo1 ^= Do1; \
+ Bbo0 = ROL32(A##mo1, 11); \
+ E##be0 = Bbe0 ^((~Bbi0)| Bbo0 ); \
+ A##su0 ^= Du0; \
+ Bbu0 = ROL32(A##su0, 7); \
+ E##bi0 = Bbi0 ^( Bbo0 & Bbu0 ); \
+ E##bo0 = Bbo0 ^( Bbu0 | Bba0 ); \
+ E##bu0 = Bbu0 ^( Bba0 & Bbe0 ); \
+\
+ A##ba1 ^= Da1; \
+ Bba1 = A##ba1; \
+ A##ge1 ^= De1; \
+ Bbe1 = ROL32(A##ge1, 22); \
+ A##ki0 ^= Di0; \
+ Bbi1 = ROL32(A##ki0, 21); \
+ E##ba1 = Bba1 ^( Bbe1 | Bbi1 ); \
+ E##ba1 ^= KeccakF1600RoundConstants_int2_1[i]; \
+ A##mo0 ^= Do0; \
+ Bbo1 = ROL32(A##mo0, 10); \
+ E##be1 = Bbe1 ^((~Bbi1)| Bbo1 ); \
+ A##su1 ^= Du1; \
+ Bbu1 = ROL32(A##su1, 7); \
+ E##bi1 = Bbi1 ^( Bbo1 & Bbu1 ); \
+ E##bo1 = Bbo1 ^( Bbu1 | Bba1 ); \
+ E##bu1 = Bbu1 ^( Bba1 & Bbe1 ); \
+\
+ A##bo0 ^= Do0; \
+ Bga0 = ROL32(A##bo0, 14); \
+ A##gu0 ^= Du0; \
+ Bge0 = ROL32(A##gu0, 10); \
+ A##ka1 ^= Da1; \
+ Bgi0 = ROL32(A##ka1, 2); \
+ E##ga0 = Bga0 ^( Bge0 | Bgi0 ); \
+ A##me1 ^= De1; \
+ Bgo0 = ROL32(A##me1, 23); \
+ E##ge0 = Bge0 ^( Bgi0 & Bgo0 ); \
+ A##si1 ^= Di1; \
+ Bgu0 = ROL32(A##si1, 31); \
+ E##gi0 = Bgi0 ^( Bgo0 |(~Bgu0)); \
+ E##go0 = Bgo0 ^( Bgu0 | Bga0 ); \
+ E##gu0 = Bgu0 ^( Bga0 & Bge0 ); \
+\
+ A##bo1 ^= Do1; \
+ Bga1 = ROL32(A##bo1, 14); \
+ A##gu1 ^= Du1; \
+ Bge1 = ROL32(A##gu1, 10); \
+ A##ka0 ^= Da0; \
+ Bgi1 = ROL32(A##ka0, 1); \
+ E##ga1 = Bga1 ^( Bge1 | Bgi1 ); \
+ A##me0 ^= De0; \
+ Bgo1 = ROL32(A##me0, 22); \
+ E##ge1 = Bge1 ^( Bgi1 & Bgo1 ); \
+ A##si0 ^= Di0; \
+ Bgu1 = ROL32(A##si0, 30); \
+ E##gi1 = Bgi1 ^( Bgo1 |(~Bgu1)); \
+ E##go1 = Bgo1 ^( Bgu1 | Bga1 ); \
+ E##gu1 = Bgu1 ^( Bga1 & Bge1 ); \
+\
+ A##be1 ^= De1; \
+ Bka0 = ROL32(A##be1, 1); \
+ A##gi0 ^= Di0; \
+ Bke0 = ROL32(A##gi0, 3); \
+ A##ko1 ^= Do1; \
+ Bki0 = ROL32(A##ko1, 13); \
+ E##ka0 = Bka0 ^( Bke0 | Bki0 ); \
+ A##mu0 ^= Du0; \
+ Bko0 = ROL32(A##mu0, 4); \
+ E##ke0 = Bke0 ^( Bki0 & Bko0 ); \
+ A##sa0 ^= Da0; \
+ Bku0 = ROL32(A##sa0, 9); \
+ E##ki0 = Bki0 ^((~Bko0)& Bku0 ); \
+ E##ko0 = (~Bko0)^( Bku0 | Bka0 ); \
+ E##ku0 = Bku0 ^( Bka0 & Bke0 ); \
+\
+ A##be0 ^= De0; \
+ Bka1 = A##be0; \
+ A##gi1 ^= Di1; \
+ Bke1 = ROL32(A##gi1, 3); \
+ A##ko0 ^= Do0; \
+ Bki1 = ROL32(A##ko0, 12); \
+ E##ka1 = Bka1 ^( Bke1 | Bki1 ); \
+ A##mu1 ^= Du1; \
+ Bko1 = ROL32(A##mu1, 4); \
+ E##ke1 = Bke1 ^( Bki1 & Bko1 ); \
+ A##sa1 ^= Da1; \
+ Bku1 = ROL32(A##sa1, 9); \
+ E##ki1 = Bki1 ^((~Bko1)& Bku1 ); \
+ E##ko1 = (~Bko1)^( Bku1 | Bka1 ); \
+ E##ku1 = Bku1 ^( Bka1 & Bke1 ); \
+\
+ A##bu1 ^= Du1; \
+ Bma0 = ROL32(A##bu1, 14); \
+ A##ga0 ^= Da0; \
+ Bme0 = ROL32(A##ga0, 18); \
+ A##ke0 ^= De0; \
+ Bmi0 = ROL32(A##ke0, 5); \
+ E##ma0 = Bma0 ^( Bme0 & Bmi0 ); \
+ A##mi1 ^= Di1; \
+ Bmo0 = ROL32(A##mi1, 8); \
+ E##me0 = Bme0 ^( Bmi0 | Bmo0 ); \
+ A##so0 ^= Do0; \
+ Bmu0 = ROL32(A##so0, 28); \
+ E##mi0 = Bmi0 ^((~Bmo0)| Bmu0 ); \
+ E##mo0 = (~Bmo0)^( Bmu0 & Bma0 ); \
+ E##mu0 = Bmu0 ^( Bma0 | Bme0 ); \
+\
+ A##bu0 ^= Du0; \
+ Bma1 = ROL32(A##bu0, 13); \
+ A##ga1 ^= Da1; \
+ Bme1 = ROL32(A##ga1, 18); \
+ A##ke1 ^= De1; \
+ Bmi1 = ROL32(A##ke1, 5); \
+ E##ma1 = Bma1 ^( Bme1 & Bmi1 ); \
+ A##mi0 ^= Di0; \
+ Bmo1 = ROL32(A##mi0, 7); \
+ E##me1 = Bme1 ^( Bmi1 | Bmo1 ); \
+ A##so1 ^= Do1; \
+ Bmu1 = ROL32(A##so1, 28); \
+ E##mi1 = Bmi1 ^((~Bmo1)| Bmu1 ); \
+ E##mo1 = (~Bmo1)^( Bmu1 & Bma1 ); \
+ E##mu1 = Bmu1 ^( Bma1 | Bme1 ); \
+\
+ A##bi0 ^= Di0; \
+ Bsa0 = ROL32(A##bi0, 31); \
+ A##go1 ^= Do1; \
+ Bse0 = ROL32(A##go1, 28); \
+ A##ku1 ^= Du1; \
+ Bsi0 = ROL32(A##ku1, 20); \
+ E##sa0 = Bsa0 ^((~Bse0)& Bsi0 ); \
+ A##ma1 ^= Da1; \
+ Bso0 = ROL32(A##ma1, 21); \
+ E##se0 = (~Bse0)^( Bsi0 | Bso0 ); \
+ A##se0 ^= De0; \
+ Bsu0 = ROL32(A##se0, 1); \
+ E##si0 = Bsi0 ^( Bso0 & Bsu0 ); \
+ E##so0 = Bso0 ^( Bsu0 | Bsa0 ); \
+ E##su0 = Bsu0 ^( Bsa0 & Bse0 ); \
+\
+ A##bi1 ^= Di1; \
+ Bsa1 = ROL32(A##bi1, 31); \
+ A##go0 ^= Do0; \
+ Bse1 = ROL32(A##go0, 27); \
+ A##ku0 ^= Du0; \
+ Bsi1 = ROL32(A##ku0, 19); \
+ E##sa1 = Bsa1 ^((~Bse1)& Bsi1 ); \
+ A##ma0 ^= Da0; \
+ Bso1 = ROL32(A##ma0, 20); \
+ E##se1 = (~Bse1)^( Bsi1 | Bso1 ); \
+ A##se1 ^= De1; \
+ Bsu1 = ROL32(A##se1, 1); \
+ E##si1 = Bsi1 ^( Bso1 & Bsu1 ); \
+ E##so1 = Bso1 ^( Bsu1 | Bsa1 ); \
+ E##su1 = Bsu1 ^( Bsa1 & Bse1 ); \
+\
+
+#else /* UseBebigokimisa */
+/* --- Code for round, with prepare-theta */
+/* --- using factor 2 interleaving, 64-bit lanes mapped to 32-bit words */
+#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
+ Da0 = Cu0^ROL32(Ce1, 1); \
+ Da1 = Cu1^Ce0; \
+ De0 = Ca0^ROL32(Ci1, 1); \
+ De1 = Ca1^Ci0; \
+ Di0 = Ce0^ROL32(Co1, 1); \
+ Di1 = Ce1^Co0; \
+ Do0 = Ci0^ROL32(Cu1, 1); \
+ Do1 = Ci1^Cu0; \
+ Du0 = Co0^ROL32(Ca1, 1); \
+ Du1 = Co1^Ca0; \
+\
+ A##ba0 ^= Da0; \
+ Bba0 = A##ba0; \
+ A##ge0 ^= De0; \
+ Bbe0 = ROL32(A##ge0, 22); \
+ A##ki1 ^= Di1; \
+ Bbi0 = ROL32(A##ki1, 22); \
+ E##ba0 = Bba0 ^((~Bbe0)& Bbi0 ); \
+ E##ba0 ^= KeccakF1600RoundConstants_int2_0[i]; \
+ Ca0 = E##ba0; \
+ A##mo1 ^= Do1; \
+ Bbo0 = ROL32(A##mo1, 11); \
+ E##be0 = Bbe0 ^((~Bbi0)& Bbo0 ); \
+ Ce0 = E##be0; \
+ A##su0 ^= Du0; \
+ Bbu0 = ROL32(A##su0, 7); \
+ E##bi0 = Bbi0 ^((~Bbo0)& Bbu0 ); \
+ Ci0 = E##bi0; \
+ E##bo0 = Bbo0 ^((~Bbu0)& Bba0 ); \
+ Co0 = E##bo0; \
+ E##bu0 = Bbu0 ^((~Bba0)& Bbe0 ); \
+ Cu0 = E##bu0; \
+\
+ A##ba1 ^= Da1; \
+ Bba1 = A##ba1; \
+ A##ge1 ^= De1; \
+ Bbe1 = ROL32(A##ge1, 22); \
+ A##ki0 ^= Di0; \
+ Bbi1 = ROL32(A##ki0, 21); \
+ E##ba1 = Bba1 ^((~Bbe1)& Bbi1 ); \
+ E##ba1 ^= KeccakF1600RoundConstants_int2_1[i]; \
+ Ca1 = E##ba1; \
+ A##mo0 ^= Do0; \
+ Bbo1 = ROL32(A##mo0, 10); \
+ E##be1 = Bbe1 ^((~Bbi1)& Bbo1 ); \
+ Ce1 = E##be1; \
+ A##su1 ^= Du1; \
+ Bbu1 = ROL32(A##su1, 7); \
+ E##bi1 = Bbi1 ^((~Bbo1)& Bbu1 ); \
+ Ci1 = E##bi1; \
+ E##bo1 = Bbo1 ^((~Bbu1)& Bba1 ); \
+ Co1 = E##bo1; \
+ E##bu1 = Bbu1 ^((~Bba1)& Bbe1 ); \
+ Cu1 = E##bu1; \
+\
+ A##bo0 ^= Do0; \
+ Bga0 = ROL32(A##bo0, 14); \
+ A##gu0 ^= Du0; \
+ Bge0 = ROL32(A##gu0, 10); \
+ A##ka1 ^= Da1; \
+ Bgi0 = ROL32(A##ka1, 2); \
+ E##ga0 = Bga0 ^((~Bge0)& Bgi0 ); \
+ Ca0 ^= E##ga0; \
+ A##me1 ^= De1; \
+ Bgo0 = ROL32(A##me1, 23); \
+ E##ge0 = Bge0 ^((~Bgi0)& Bgo0 ); \
+ Ce0 ^= E##ge0; \
+ A##si1 ^= Di1; \
+ Bgu0 = ROL32(A##si1, 31); \
+ E##gi0 = Bgi0 ^((~Bgo0)& Bgu0 ); \
+ Ci0 ^= E##gi0; \
+ E##go0 = Bgo0 ^((~Bgu0)& Bga0 ); \
+ Co0 ^= E##go0; \
+ E##gu0 = Bgu0 ^((~Bga0)& Bge0 ); \
+ Cu0 ^= E##gu0; \
+\
+ A##bo1 ^= Do1; \
+ Bga1 = ROL32(A##bo1, 14); \
+ A##gu1 ^= Du1; \
+ Bge1 = ROL32(A##gu1, 10); \
+ A##ka0 ^= Da0; \
+ Bgi1 = ROL32(A##ka0, 1); \
+ E##ga1 = Bga1 ^((~Bge1)& Bgi1 ); \
+ Ca1 ^= E##ga1; \
+ A##me0 ^= De0; \
+ Bgo1 = ROL32(A##me0, 22); \
+ E##ge1 = Bge1 ^((~Bgi1)& Bgo1 ); \
+ Ce1 ^= E##ge1; \
+ A##si0 ^= Di0; \
+ Bgu1 = ROL32(A##si0, 30); \
+ E##gi1 = Bgi1 ^((~Bgo1)& Bgu1 ); \
+ Ci1 ^= E##gi1; \
+ E##go1 = Bgo1 ^((~Bgu1)& Bga1 ); \
+ Co1 ^= E##go1; \
+ E##gu1 = Bgu1 ^((~Bga1)& Bge1 ); \
+ Cu1 ^= E##gu1; \
+\
+ A##be1 ^= De1; \
+ Bka0 = ROL32(A##be1, 1); \
+ A##gi0 ^= Di0; \
+ Bke0 = ROL32(A##gi0, 3); \
+ A##ko1 ^= Do1; \
+ Bki0 = ROL32(A##ko1, 13); \
+ E##ka0 = Bka0 ^((~Bke0)& Bki0 ); \
+ Ca0 ^= E##ka0; \
+ A##mu0 ^= Du0; \
+ Bko0 = ROL32(A##mu0, 4); \
+ E##ke0 = Bke0 ^((~Bki0)& Bko0 ); \
+ Ce0 ^= E##ke0; \
+ A##sa0 ^= Da0; \
+ Bku0 = ROL32(A##sa0, 9); \
+ E##ki0 = Bki0 ^((~Bko0)& Bku0 ); \
+ Ci0 ^= E##ki0; \
+ E##ko0 = Bko0 ^((~Bku0)& Bka0 ); \
+ Co0 ^= E##ko0; \
+ E##ku0 = Bku0 ^((~Bka0)& Bke0 ); \
+ Cu0 ^= E##ku0; \
+\
+ A##be0 ^= De0; \
+ Bka1 = A##be0; \
+ A##gi1 ^= Di1; \
+ Bke1 = ROL32(A##gi1, 3); \
+ A##ko0 ^= Do0; \
+ Bki1 = ROL32(A##ko0, 12); \
+ E##ka1 = Bka1 ^((~Bke1)& Bki1 ); \
+ Ca1 ^= E##ka1; \
+ A##mu1 ^= Du1; \
+ Bko1 = ROL32(A##mu1, 4); \
+ E##ke1 = Bke1 ^((~Bki1)& Bko1 ); \
+ Ce1 ^= E##ke1; \
+ A##sa1 ^= Da1; \
+ Bku1 = ROL32(A##sa1, 9); \
+ E##ki1 = Bki1 ^((~Bko1)& Bku1 ); \
+ Ci1 ^= E##ki1; \
+ E##ko1 = Bko1 ^((~Bku1)& Bka1 ); \
+ Co1 ^= E##ko1; \
+ E##ku1 = Bku1 ^((~Bka1)& Bke1 ); \
+ Cu1 ^= E##ku1; \
+\
+ A##bu1 ^= Du1; \
+ Bma0 = ROL32(A##bu1, 14); \
+ A##ga0 ^= Da0; \
+ Bme0 = ROL32(A##ga0, 18); \
+ A##ke0 ^= De0; \
+ Bmi0 = ROL32(A##ke0, 5); \
+ E##ma0 = Bma0 ^((~Bme0)& Bmi0 ); \
+ Ca0 ^= E##ma0; \
+ A##mi1 ^= Di1; \
+ Bmo0 = ROL32(A##mi1, 8); \
+ E##me0 = Bme0 ^((~Bmi0)& Bmo0 ); \
+ Ce0 ^= E##me0; \
+ A##so0 ^= Do0; \
+ Bmu0 = ROL32(A##so0, 28); \
+ E##mi0 = Bmi0 ^((~Bmo0)& Bmu0 ); \
+ Ci0 ^= E##mi0; \
+ E##mo0 = Bmo0 ^((~Bmu0)& Bma0 ); \
+ Co0 ^= E##mo0; \
+ E##mu0 = Bmu0 ^((~Bma0)& Bme0 ); \
+ Cu0 ^= E##mu0; \
+\
+ A##bu0 ^= Du0; \
+ Bma1 = ROL32(A##bu0, 13); \
+ A##ga1 ^= Da1; \
+ Bme1 = ROL32(A##ga1, 18); \
+ A##ke1 ^= De1; \
+ Bmi1 = ROL32(A##ke1, 5); \
+ E##ma1 = Bma1 ^((~Bme1)& Bmi1 ); \
+ Ca1 ^= E##ma1; \
+ A##mi0 ^= Di0; \
+ Bmo1 = ROL32(A##mi0, 7); \
+ E##me1 = Bme1 ^((~Bmi1)& Bmo1 ); \
+ Ce1 ^= E##me1; \
+ A##so1 ^= Do1; \
+ Bmu1 = ROL32(A##so1, 28); \
+ E##mi1 = Bmi1 ^((~Bmo1)& Bmu1 ); \
+ Ci1 ^= E##mi1; \
+ E##mo1 = Bmo1 ^((~Bmu1)& Bma1 ); \
+ Co1 ^= E##mo1; \
+ E##mu1 = Bmu1 ^((~Bma1)& Bme1 ); \
+ Cu1 ^= E##mu1; \
+\
+ A##bi0 ^= Di0; \
+ Bsa0 = ROL32(A##bi0, 31); \
+ A##go1 ^= Do1; \
+ Bse0 = ROL32(A##go1, 28); \
+ A##ku1 ^= Du1; \
+ Bsi0 = ROL32(A##ku1, 20); \
+ E##sa0 = Bsa0 ^((~Bse0)& Bsi0 ); \
+ Ca0 ^= E##sa0; \
+ A##ma1 ^= Da1; \
+ Bso0 = ROL32(A##ma1, 21); \
+ E##se0 = Bse0 ^((~Bsi0)& Bso0 ); \
+ Ce0 ^= E##se0; \
+ A##se0 ^= De0; \
+ Bsu0 = ROL32(A##se0, 1); \
+ E##si0 = Bsi0 ^((~Bso0)& Bsu0 ); \
+ Ci0 ^= E##si0; \
+ E##so0 = Bso0 ^((~Bsu0)& Bsa0 ); \
+ Co0 ^= E##so0; \
+ E##su0 = Bsu0 ^((~Bsa0)& Bse0 ); \
+ Cu0 ^= E##su0; \
+\
+ A##bi1 ^= Di1; \
+ Bsa1 = ROL32(A##bi1, 31); \
+ A##go0 ^= Do0; \
+ Bse1 = ROL32(A##go0, 27); \
+ A##ku0 ^= Du0; \
+ Bsi1 = ROL32(A##ku0, 19); \
+ E##sa1 = Bsa1 ^((~Bse1)& Bsi1 ); \
+ Ca1 ^= E##sa1; \
+ A##ma0 ^= Da0; \
+ Bso1 = ROL32(A##ma0, 20); \
+ E##se1 = Bse1 ^((~Bsi1)& Bso1 ); \
+ Ce1 ^= E##se1; \
+ A##se1 ^= De1; \
+ Bsu1 = ROL32(A##se1, 1); \
+ E##si1 = Bsi1 ^((~Bso1)& Bsu1 ); \
+ Ci1 ^= E##si1; \
+ E##so1 = Bso1 ^((~Bsu1)& Bsa1 ); \
+ Co1 ^= E##so1; \
+ E##su1 = Bsu1 ^((~Bsa1)& Bse1 ); \
+ Cu1 ^= E##su1; \
+\
+
+/* --- Code for round */
+/* --- using factor 2 interleaving, 64-bit lanes mapped to 32-bit words */
+#define thetaRhoPiChiIota(i, A, E) \
+ Da0 = Cu0^ROL32(Ce1, 1); \
+ Da1 = Cu1^Ce0; \
+ De0 = Ca0^ROL32(Ci1, 1); \
+ De1 = Ca1^Ci0; \
+ Di0 = Ce0^ROL32(Co1, 1); \
+ Di1 = Ce1^Co0; \
+ Do0 = Ci0^ROL32(Cu1, 1); \
+ Do1 = Ci1^Cu0; \
+ Du0 = Co0^ROL32(Ca1, 1); \
+ Du1 = Co1^Ca0; \
+\
+ A##ba0 ^= Da0; \
+ Bba0 = A##ba0; \
+ A##ge0 ^= De0; \
+ Bbe0 = ROL32(A##ge0, 22); \
+ A##ki1 ^= Di1; \
+ Bbi0 = ROL32(A##ki1, 22); \
+ E##ba0 = Bba0 ^((~Bbe0)& Bbi0 ); \
+ E##ba0 ^= KeccakF1600RoundConstants_int2_0[i]; \
+ A##mo1 ^= Do1; \
+ Bbo0 = ROL32(A##mo1, 11); \
+ E##be0 = Bbe0 ^((~Bbi0)& Bbo0 ); \
+ A##su0 ^= Du0; \
+ Bbu0 = ROL32(A##su0, 7); \
+ E##bi0 = Bbi0 ^((~Bbo0)& Bbu0 ); \
+ E##bo0 = Bbo0 ^((~Bbu0)& Bba0 ); \
+ E##bu0 = Bbu0 ^((~Bba0)& Bbe0 ); \
+\
+ A##ba1 ^= Da1; \
+ Bba1 = A##ba1; \
+ A##ge1 ^= De1; \
+ Bbe1 = ROL32(A##ge1, 22); \
+ A##ki0 ^= Di0; \
+ Bbi1 = ROL32(A##ki0, 21); \
+ E##ba1 = Bba1 ^((~Bbe1)& Bbi1 ); \
+ E##ba1 ^= KeccakF1600RoundConstants_int2_1[i]; \
+ A##mo0 ^= Do0; \
+ Bbo1 = ROL32(A##mo0, 10); \
+ E##be1 = Bbe1 ^((~Bbi1)& Bbo1 ); \
+ A##su1 ^= Du1; \
+ Bbu1 = ROL32(A##su1, 7); \
+ E##bi1 = Bbi1 ^((~Bbo1)& Bbu1 ); \
+ E##bo1 = Bbo1 ^((~Bbu1)& Bba1 ); \
+ E##bu1 = Bbu1 ^((~Bba1)& Bbe1 ); \
+\
+ A##bo0 ^= Do0; \
+ Bga0 = ROL32(A##bo0, 14); \
+ A##gu0 ^= Du0; \
+ Bge0 = ROL32(A##gu0, 10); \
+ A##ka1 ^= Da1; \
+ Bgi0 = ROL32(A##ka1, 2); \
+ E##ga0 = Bga0 ^((~Bge0)& Bgi0 ); \
+ A##me1 ^= De1; \
+ Bgo0 = ROL32(A##me1, 23); \
+ E##ge0 = Bge0 ^((~Bgi0)& Bgo0 ); \
+ A##si1 ^= Di1; \
+ Bgu0 = ROL32(A##si1, 31); \
+ E##gi0 = Bgi0 ^((~Bgo0)& Bgu0 ); \
+ E##go0 = Bgo0 ^((~Bgu0)& Bga0 ); \
+ E##gu0 = Bgu0 ^((~Bga0)& Bge0 ); \
+\
+ A##bo1 ^= Do1; \
+ Bga1 = ROL32(A##bo1, 14); \
+ A##gu1 ^= Du1; \
+ Bge1 = ROL32(A##gu1, 10); \
+ A##ka0 ^= Da0; \
+ Bgi1 = ROL32(A##ka0, 1); \
+ E##ga1 = Bga1 ^((~Bge1)& Bgi1 ); \
+ A##me0 ^= De0; \
+ Bgo1 = ROL32(A##me0, 22); \
+ E##ge1 = Bge1 ^((~Bgi1)& Bgo1 ); \
+ A##si0 ^= Di0; \
+ Bgu1 = ROL32(A##si0, 30); \
+ E##gi1 = Bgi1 ^((~Bgo1)& Bgu1 ); \
+ E##go1 = Bgo1 ^((~Bgu1)& Bga1 ); \
+ E##gu1 = Bgu1 ^((~Bga1)& Bge1 ); \
+\
+ A##be1 ^= De1; \
+ Bka0 = ROL32(A##be1, 1); \
+ A##gi0 ^= Di0; \
+ Bke0 = ROL32(A##gi0, 3); \
+ A##ko1 ^= Do1; \
+ Bki0 = ROL32(A##ko1, 13); \
+ E##ka0 = Bka0 ^((~Bke0)& Bki0 ); \
+ A##mu0 ^= Du0; \
+ Bko0 = ROL32(A##mu0, 4); \
+ E##ke0 = Bke0 ^((~Bki0)& Bko0 ); \
+ A##sa0 ^= Da0; \
+ Bku0 = ROL32(A##sa0, 9); \
+ E##ki0 = Bki0 ^((~Bko0)& Bku0 ); \
+ E##ko0 = Bko0 ^((~Bku0)& Bka0 ); \
+ E##ku0 = Bku0 ^((~Bka0)& Bke0 ); \
+\
+ A##be0 ^= De0; \
+ Bka1 = A##be0; \
+ A##gi1 ^= Di1; \
+ Bke1 = ROL32(A##gi1, 3); \
+ A##ko0 ^= Do0; \
+ Bki1 = ROL32(A##ko0, 12); \
+ E##ka1 = Bka1 ^((~Bke1)& Bki1 ); \
+ A##mu1 ^= Du1; \
+ Bko1 = ROL32(A##mu1, 4); \
+ E##ke1 = Bke1 ^((~Bki1)& Bko1 ); \
+ A##sa1 ^= Da1; \
+ Bku1 = ROL32(A##sa1, 9); \
+ E##ki1 = Bki1 ^((~Bko1)& Bku1 ); \
+ E##ko1 = Bko1 ^((~Bku1)& Bka1 ); \
+ E##ku1 = Bku1 ^((~Bka1)& Bke1 ); \
+\
+ A##bu1 ^= Du1; \
+ Bma0 = ROL32(A##bu1, 14); \
+ A##ga0 ^= Da0; \
+ Bme0 = ROL32(A##ga0, 18); \
+ A##ke0 ^= De0; \
+ Bmi0 = ROL32(A##ke0, 5); \
+ E##ma0 = Bma0 ^((~Bme0)& Bmi0 ); \
+ A##mi1 ^= Di1; \
+ Bmo0 = ROL32(A##mi1, 8); \
+ E##me0 = Bme0 ^((~Bmi0)& Bmo0 ); \
+ A##so0 ^= Do0; \
+ Bmu0 = ROL32(A##so0, 28); \
+ E##mi0 = Bmi0 ^((~Bmo0)& Bmu0 ); \
+ E##mo0 = Bmo0 ^((~Bmu0)& Bma0 ); \
+ E##mu0 = Bmu0 ^((~Bma0)& Bme0 ); \
+\
+ A##bu0 ^= Du0; \
+ Bma1 = ROL32(A##bu0, 13); \
+ A##ga1 ^= Da1; \
+ Bme1 = ROL32(A##ga1, 18); \
+ A##ke1 ^= De1; \
+ Bmi1 = ROL32(A##ke1, 5); \
+ E##ma1 = Bma1 ^((~Bme1)& Bmi1 ); \
+ A##mi0 ^= Di0; \
+ Bmo1 = ROL32(A##mi0, 7); \
+ E##me1 = Bme1 ^((~Bmi1)& Bmo1 ); \
+ A##so1 ^= Do1; \
+ Bmu1 = ROL32(A##so1, 28); \
+ E##mi1 = Bmi1 ^((~Bmo1)& Bmu1 ); \
+ E##mo1 = Bmo1 ^((~Bmu1)& Bma1 ); \
+ E##mu1 = Bmu1 ^((~Bma1)& Bme1 ); \
+\
+ A##bi0 ^= Di0; \
+ Bsa0 = ROL32(A##bi0, 31); \
+ A##go1 ^= Do1; \
+ Bse0 = ROL32(A##go1, 28); \
+ A##ku1 ^= Du1; \
+ Bsi0 = ROL32(A##ku1, 20); \
+ E##sa0 = Bsa0 ^((~Bse0)& Bsi0 ); \
+ A##ma1 ^= Da1; \
+ Bso0 = ROL32(A##ma1, 21); \
+ E##se0 = Bse0 ^((~Bsi0)& Bso0 ); \
+ A##se0 ^= De0; \
+ Bsu0 = ROL32(A##se0, 1); \
+ E##si0 = Bsi0 ^((~Bso0)& Bsu0 ); \
+ E##so0 = Bso0 ^((~Bsu0)& Bsa0 ); \
+ E##su0 = Bsu0 ^((~Bsa0)& Bse0 ); \
+\
+ A##bi1 ^= Di1; \
+ Bsa1 = ROL32(A##bi1, 31); \
+ A##go0 ^= Do0; \
+ Bse1 = ROL32(A##go0, 27); \
+ A##ku0 ^= Du0; \
+ Bsi1 = ROL32(A##ku0, 19); \
+ E##sa1 = Bsa1 ^((~Bse1)& Bsi1 ); \
+ A##ma0 ^= Da0; \
+ Bso1 = ROL32(A##ma0, 20); \
+ E##se1 = Bse1 ^((~Bsi1)& Bso1 ); \
+ A##se1 ^= De1; \
+ Bsu1 = ROL32(A##se1, 1); \
+ E##si1 = Bsi1 ^((~Bso1)& Bsu1 ); \
+ E##so1 = Bso1 ^((~Bsu1)& Bsa1 ); \
+ E##su1 = Bsu1 ^((~Bsa1)& Bse1 ); \
+\
+
+#endif /* UseBebigokimisa */
+
+const UINT32 KeccakF1600RoundConstants_int2_0[24] = {
+ 0x00000001UL,
+ 0x00000000UL,
+ 0x00000000UL,
+ 0x00000000UL,
+ 0x00000001UL,
+ 0x00000001UL,
+ 0x00000001UL,
+ 0x00000001UL,
+ 0x00000000UL,
+ 0x00000000UL,
+ 0x00000001UL,
+ 0x00000000UL,
+ 0x00000001UL,
+ 0x00000001UL,
+ 0x00000001UL,
+ 0x00000001UL,
+ 0x00000000UL,
+ 0x00000000UL,
+ 0x00000000UL,
+ 0x00000000UL,
+ 0x00000001UL,
+ 0x00000000UL,
+ 0x00000001UL,
+ 0x00000000UL };
+
+const UINT32 KeccakF1600RoundConstants_int2_1[24] = {
+ 0x00000000UL,
+ 0x00000089UL,
+ 0x8000008bUL,
+ 0x80008080UL,
+ 0x0000008bUL,
+ 0x00008000UL,
+ 0x80008088UL,
+ 0x80000082UL,
+ 0x0000000bUL,
+ 0x0000000aUL,
+ 0x00008082UL,
+ 0x00008003UL,
+ 0x0000808bUL,
+ 0x8000000bUL,
+ 0x8000008aUL,
+ 0x80000081UL,
+ 0x80000081UL,
+ 0x80000008UL,
+ 0x00000083UL,
+ 0x80008003UL,
+ 0x80008088UL,
+ 0x80000088UL,
+ 0x00008000UL,
+ 0x80008082UL };
+
+#define copyFromStateAndXor1024bits(X, state, input) \
+ X##ba0 = state[ 0]^input[ 0]; \
+ X##ba1 = state[ 1]^input[ 1]; \
+ X##be0 = state[ 2]^input[ 2]; \
+ X##be1 = state[ 3]^input[ 3]; \
+ X##bi0 = state[ 4]^input[ 4]; \
+ X##bi1 = state[ 5]^input[ 5]; \
+ X##bo0 = state[ 6]^input[ 6]; \
+ X##bo1 = state[ 7]^input[ 7]; \
+ X##bu0 = state[ 8]^input[ 8]; \
+ X##bu1 = state[ 9]^input[ 9]; \
+ X##ga0 = state[10]^input[10]; \
+ X##ga1 = state[11]^input[11]; \
+ X##ge0 = state[12]^input[12]; \
+ X##ge1 = state[13]^input[13]; \
+ X##gi0 = state[14]^input[14]; \
+ X##gi1 = state[15]^input[15]; \
+ X##go0 = state[16]^input[16]; \
+ X##go1 = state[17]^input[17]; \
+ X##gu0 = state[18]^input[18]; \
+ X##gu1 = state[19]^input[19]; \
+ X##ka0 = state[20]^input[20]; \
+ X##ka1 = state[21]^input[21]; \
+ X##ke0 = state[22]^input[22]; \
+ X##ke1 = state[23]^input[23]; \
+ X##ki0 = state[24]^input[24]; \
+ X##ki1 = state[25]^input[25]; \
+ X##ko0 = state[26]^input[26]; \
+ X##ko1 = state[27]^input[27]; \
+ X##ku0 = state[28]^input[28]; \
+ X##ku1 = state[29]^input[29]; \
+ X##ma0 = state[30]^input[30]; \
+ X##ma1 = state[31]^input[31]; \
+ X##me0 = state[32]; \
+ X##me1 = state[33]; \
+ X##mi0 = state[34]; \
+ X##mi1 = state[35]; \
+ X##mo0 = state[36]; \
+ X##mo1 = state[37]; \
+ X##mu0 = state[38]; \
+ X##mu1 = state[39]; \
+ X##sa0 = state[40]; \
+ X##sa1 = state[41]; \
+ X##se0 = state[42]; \
+ X##se1 = state[43]; \
+ X##si0 = state[44]; \
+ X##si1 = state[45]; \
+ X##so0 = state[46]; \
+ X##so1 = state[47]; \
+ X##su0 = state[48]; \
+ X##su1 = state[49]; \
+
+#define copyFromStateAndXor1088bits(X, state, input) \
+ X##ba0 = state[ 0]^input[ 0]; \
+ X##ba1 = state[ 1]^input[ 1]; \
+ X##be0 = state[ 2]^input[ 2]; \
+ X##be1 = state[ 3]^input[ 3]; \
+ X##bi0 = state[ 4]^input[ 4]; \
+ X##bi1 = state[ 5]^input[ 5]; \
+ X##bo0 = state[ 6]^input[ 6]; \
+ X##bo1 = state[ 7]^input[ 7]; \
+ X##bu0 = state[ 8]^input[ 8]; \
+ X##bu1 = state[ 9]^input[ 9]; \
+ X##ga0 = state[10]^input[10]; \
+ X##ga1 = state[11]^input[11]; \
+ X##ge0 = state[12]^input[12]; \
+ X##ge1 = state[13]^input[13]; \
+ X##gi0 = state[14]^input[14]; \
+ X##gi1 = state[15]^input[15]; \
+ X##go0 = state[16]^input[16]; \
+ X##go1 = state[17]^input[17]; \
+ X##gu0 = state[18]^input[18]; \
+ X##gu1 = state[19]^input[19]; \
+ X##ka0 = state[20]^input[20]; \
+ X##ka1 = state[21]^input[21]; \
+ X##ke0 = state[22]^input[22]; \
+ X##ke1 = state[23]^input[23]; \
+ X##ki0 = state[24]^input[24]; \
+ X##ki1 = state[25]^input[25]; \
+ X##ko0 = state[26]^input[26]; \
+ X##ko1 = state[27]^input[27]; \
+ X##ku0 = state[28]^input[28]; \
+ X##ku1 = state[29]^input[29]; \
+ X##ma0 = state[30]^input[30]; \
+ X##ma1 = state[31]^input[31]; \
+ X##me0 = state[32]^input[32]; \
+ X##me1 = state[33]^input[33]; \
+ X##mi0 = state[34]; \
+ X##mi1 = state[35]; \
+ X##mo0 = state[36]; \
+ X##mo1 = state[37]; \
+ X##mu0 = state[38]; \
+ X##mu1 = state[39]; \
+ X##sa0 = state[40]; \
+ X##sa1 = state[41]; \
+ X##se0 = state[42]; \
+ X##se1 = state[43]; \
+ X##si0 = state[44]; \
+ X##si1 = state[45]; \
+ X##so0 = state[46]; \
+ X##so1 = state[47]; \
+ X##su0 = state[48]; \
+ X##su1 = state[49]; \
+
+#define copyFromState(X, state) \
+ X##ba0 = state[ 0]; \
+ X##ba1 = state[ 1]; \
+ X##be0 = state[ 2]; \
+ X##be1 = state[ 3]; \
+ X##bi0 = state[ 4]; \
+ X##bi1 = state[ 5]; \
+ X##bo0 = state[ 6]; \
+ X##bo1 = state[ 7]; \
+ X##bu0 = state[ 8]; \
+ X##bu1 = state[ 9]; \
+ X##ga0 = state[10]; \
+ X##ga1 = state[11]; \
+ X##ge0 = state[12]; \
+ X##ge1 = state[13]; \
+ X##gi0 = state[14]; \
+ X##gi1 = state[15]; \
+ X##go0 = state[16]; \
+ X##go1 = state[17]; \
+ X##gu0 = state[18]; \
+ X##gu1 = state[19]; \
+ X##ka0 = state[20]; \
+ X##ka1 = state[21]; \
+ X##ke0 = state[22]; \
+ X##ke1 = state[23]; \
+ X##ki0 = state[24]; \
+ X##ki1 = state[25]; \
+ X##ko0 = state[26]; \
+ X##ko1 = state[27]; \
+ X##ku0 = state[28]; \
+ X##ku1 = state[29]; \
+ X##ma0 = state[30]; \
+ X##ma1 = state[31]; \
+ X##me0 = state[32]; \
+ X##me1 = state[33]; \
+ X##mi0 = state[34]; \
+ X##mi1 = state[35]; \
+ X##mo0 = state[36]; \
+ X##mo1 = state[37]; \
+ X##mu0 = state[38]; \
+ X##mu1 = state[39]; \
+ X##sa0 = state[40]; \
+ X##sa1 = state[41]; \
+ X##se0 = state[42]; \
+ X##se1 = state[43]; \
+ X##si0 = state[44]; \
+ X##si1 = state[45]; \
+ X##so0 = state[46]; \
+ X##so1 = state[47]; \
+ X##su0 = state[48]; \
+ X##su1 = state[49]; \
+
+#define copyToState(state, X) \
+ state[ 0] = X##ba0; \
+ state[ 1] = X##ba1; \
+ state[ 2] = X##be0; \
+ state[ 3] = X##be1; \
+ state[ 4] = X##bi0; \
+ state[ 5] = X##bi1; \
+ state[ 6] = X##bo0; \
+ state[ 7] = X##bo1; \
+ state[ 8] = X##bu0; \
+ state[ 9] = X##bu1; \
+ state[10] = X##ga0; \
+ state[11] = X##ga1; \
+ state[12] = X##ge0; \
+ state[13] = X##ge1; \
+ state[14] = X##gi0; \
+ state[15] = X##gi1; \
+ state[16] = X##go0; \
+ state[17] = X##go1; \
+ state[18] = X##gu0; \
+ state[19] = X##gu1; \
+ state[20] = X##ka0; \
+ state[21] = X##ka1; \
+ state[22] = X##ke0; \
+ state[23] = X##ke1; \
+ state[24] = X##ki0; \
+ state[25] = X##ki1; \
+ state[26] = X##ko0; \
+ state[27] = X##ko1; \
+ state[28] = X##ku0; \
+ state[29] = X##ku1; \
+ state[30] = X##ma0; \
+ state[31] = X##ma1; \
+ state[32] = X##me0; \
+ state[33] = X##me1; \
+ state[34] = X##mi0; \
+ state[35] = X##mi1; \
+ state[36] = X##mo0; \
+ state[37] = X##mo1; \
+ state[38] = X##mu0; \
+ state[39] = X##mu1; \
+ state[40] = X##sa0; \
+ state[41] = X##sa1; \
+ state[42] = X##se0; \
+ state[43] = X##se1; \
+ state[44] = X##si0; \
+ state[45] = X##si1; \
+ state[46] = X##so0; \
+ state[47] = X##so1; \
+ state[48] = X##su0; \
+ state[49] = X##su1; \
+
+#define copyStateVariables(X, Y) \
+ X##ba0 = Y##ba0; \
+ X##ba1 = Y##ba1; \
+ X##be0 = Y##be0; \
+ X##be1 = Y##be1; \
+ X##bi0 = Y##bi0; \
+ X##bi1 = Y##bi1; \
+ X##bo0 = Y##bo0; \
+ X##bo1 = Y##bo1; \
+ X##bu0 = Y##bu0; \
+ X##bu1 = Y##bu1; \
+ X##ga0 = Y##ga0; \
+ X##ga1 = Y##ga1; \
+ X##ge0 = Y##ge0; \
+ X##ge1 = Y##ge1; \
+ X##gi0 = Y##gi0; \
+ X##gi1 = Y##gi1; \
+ X##go0 = Y##go0; \
+ X##go1 = Y##go1; \
+ X##gu0 = Y##gu0; \
+ X##gu1 = Y##gu1; \
+ X##ka0 = Y##ka0; \
+ X##ka1 = Y##ka1; \
+ X##ke0 = Y##ke0; \
+ X##ke1 = Y##ke1; \
+ X##ki0 = Y##ki0; \
+ X##ki1 = Y##ki1; \
+ X##ko0 = Y##ko0; \
+ X##ko1 = Y##ko1; \
+ X##ku0 = Y##ku0; \
+ X##ku1 = Y##ku1; \
+ X##ma0 = Y##ma0; \
+ X##ma1 = Y##ma1; \
+ X##me0 = Y##me0; \
+ X##me1 = Y##me1; \
+ X##mi0 = Y##mi0; \
+ X##mi1 = Y##mi1; \
+ X##mo0 = Y##mo0; \
+ X##mo1 = Y##mo1; \
+ X##mu0 = Y##mu0; \
+ X##mu1 = Y##mu1; \
+ X##sa0 = Y##sa0; \
+ X##sa1 = Y##sa1; \
+ X##se0 = Y##se0; \
+ X##se1 = Y##se1; \
+ X##si0 = Y##si0; \
+ X##si1 = Y##si1; \
+ X##so0 = Y##so0; \
+ X##so1 = Y##so1; \
+ X##su0 = Y##su0; \
+ X##su1 = Y##su1; \
+
diff --git a/Modules/_sha3/keccak/KeccakF-1600-32.macros b/Modules/_sha3/keccak/KeccakF-1600-32.macros
new file mode 100644
index 0000000000..9ade600067
--- /dev/null
+++ b/Modules/_sha3/keccak/KeccakF-1600-32.macros
@@ -0,0 +1,26 @@
+/*
+The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
+Michaël Peeters and Gilles Van Assche. For more information, feedback or
+questions, please refer to our website: http://keccak.noekeon.org/
+
+Implementation by the designers,
+hereby denoted as "the implementer".
+
+To the extent possible under law, the implementer has waived all copyright
+and related or neighboring rights to the source code in this file.
+http://creativecommons.org/publicdomain/zero/1.0/
+*/
+
+#ifdef UseSchedule
+ #if (UseSchedule == 1)
+ #include "KeccakF-1600-32-s1.macros"
+ #elif (UseSchedule == 2)
+ #include "KeccakF-1600-32-s2.macros"
+ #elif (UseSchedule == 3)
+ #include "KeccakF-1600-32-rvk.macros"
+ #else
+ #error "This schedule is not supported."
+ #endif
+#else
+ #include "KeccakF-1600-32-s1.macros"
+#endif
diff --git a/Modules/_sha3/keccak/KeccakF-1600-64.macros b/Modules/_sha3/keccak/KeccakF-1600-64.macros
new file mode 100644
index 0000000000..dc0f78924d
--- /dev/null
+++ b/Modules/_sha3/keccak/KeccakF-1600-64.macros
@@ -0,0 +1,728 @@
+/*
+Code automatically generated by KeccakTools!
+
+The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
+Michaël Peeters and Gilles Van Assche. For more information, feedback or
+questions, please refer to our website: http://keccak.noekeon.org/
+
+Implementation by the designers,
+hereby denoted as "the implementer".
+
+To the extent possible under law, the implementer has waived all copyright
+and related or neighboring rights to the source code in this file.
+http://creativecommons.org/publicdomain/zero/1.0/
+*/
+
+#define declareABCDE \
+ UINT64 Aba, Abe, Abi, Abo, Abu; \
+ UINT64 Aga, Age, Agi, Ago, Agu; \
+ UINT64 Aka, Ake, Aki, Ako, Aku; \
+ UINT64 Ama, Ame, Ami, Amo, Amu; \
+ UINT64 Asa, Ase, Asi, Aso, Asu; \
+ UINT64 Bba, Bbe, Bbi, Bbo, Bbu; \
+ UINT64 Bga, Bge, Bgi, Bgo, Bgu; \
+ UINT64 Bka, Bke, Bki, Bko, Bku; \
+ UINT64 Bma, Bme, Bmi, Bmo, Bmu; \
+ UINT64 Bsa, Bse, Bsi, Bso, Bsu; \
+ UINT64 Ca, Ce, Ci, Co, Cu; \
+ UINT64 Da, De, Di, Do, Du; \
+ UINT64 Eba, Ebe, Ebi, Ebo, Ebu; \
+ UINT64 Ega, Ege, Egi, Ego, Egu; \
+ UINT64 Eka, Eke, Eki, Eko, Eku; \
+ UINT64 Ema, Eme, Emi, Emo, Emu; \
+ UINT64 Esa, Ese, Esi, Eso, Esu; \
+
+#define prepareTheta \
+ Ca = Aba^Aga^Aka^Ama^Asa; \
+ Ce = Abe^Age^Ake^Ame^Ase; \
+ Ci = Abi^Agi^Aki^Ami^Asi; \
+ Co = Abo^Ago^Ako^Amo^Aso; \
+ Cu = Abu^Agu^Aku^Amu^Asu; \
+
+#ifdef UseBebigokimisa
+/* --- Code for round, with prepare-theta (lane complementing pattern 'bebigokimisa') */
+/* --- 64-bit lanes mapped to 64-bit words */
+#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
+ Da = Cu^ROL64(Ce, 1); \
+ De = Ca^ROL64(Ci, 1); \
+ Di = Ce^ROL64(Co, 1); \
+ Do = Ci^ROL64(Cu, 1); \
+ Du = Co^ROL64(Ca, 1); \
+\
+ A##ba ^= Da; \
+ Bba = A##ba; \
+ A##ge ^= De; \
+ Bbe = ROL64(A##ge, 44); \
+ A##ki ^= Di; \
+ Bbi = ROL64(A##ki, 43); \
+ A##mo ^= Do; \
+ Bbo = ROL64(A##mo, 21); \
+ A##su ^= Du; \
+ Bbu = ROL64(A##su, 14); \
+ E##ba = Bba ^( Bbe | Bbi ); \
+ E##ba ^= KeccakF1600RoundConstants[i]; \
+ Ca = E##ba; \
+ E##be = Bbe ^((~Bbi)| Bbo ); \
+ Ce = E##be; \
+ E##bi = Bbi ^( Bbo & Bbu ); \
+ Ci = E##bi; \
+ E##bo = Bbo ^( Bbu | Bba ); \
+ Co = E##bo; \
+ E##bu = Bbu ^( Bba & Bbe ); \
+ Cu = E##bu; \
+\
+ A##bo ^= Do; \
+ Bga = ROL64(A##bo, 28); \
+ A##gu ^= Du; \
+ Bge = ROL64(A##gu, 20); \
+ A##ka ^= Da; \
+ Bgi = ROL64(A##ka, 3); \
+ A##me ^= De; \
+ Bgo = ROL64(A##me, 45); \
+ A##si ^= Di; \
+ Bgu = ROL64(A##si, 61); \
+ E##ga = Bga ^( Bge | Bgi ); \
+ Ca ^= E##ga; \
+ E##ge = Bge ^( Bgi & Bgo ); \
+ Ce ^= E##ge; \
+ E##gi = Bgi ^( Bgo |(~Bgu)); \
+ Ci ^= E##gi; \
+ E##go = Bgo ^( Bgu | Bga ); \
+ Co ^= E##go; \
+ E##gu = Bgu ^( Bga & Bge ); \
+ Cu ^= E##gu; \
+\
+ A##be ^= De; \
+ Bka = ROL64(A##be, 1); \
+ A##gi ^= Di; \
+ Bke = ROL64(A##gi, 6); \
+ A##ko ^= Do; \
+ Bki = ROL64(A##ko, 25); \
+ A##mu ^= Du; \
+ Bko = ROL64(A##mu, 8); \
+ A##sa ^= Da; \
+ Bku = ROL64(A##sa, 18); \
+ E##ka = Bka ^( Bke | Bki ); \
+ Ca ^= E##ka; \
+ E##ke = Bke ^( Bki & Bko ); \
+ Ce ^= E##ke; \
+ E##ki = Bki ^((~Bko)& Bku ); \
+ Ci ^= E##ki; \
+ E##ko = (~Bko)^( Bku | Bka ); \
+ Co ^= E##ko; \
+ E##ku = Bku ^( Bka & Bke ); \
+ Cu ^= E##ku; \
+\
+ A##bu ^= Du; \
+ Bma = ROL64(A##bu, 27); \
+ A##ga ^= Da; \
+ Bme = ROL64(A##ga, 36); \
+ A##ke ^= De; \
+ Bmi = ROL64(A##ke, 10); \
+ A##mi ^= Di; \
+ Bmo = ROL64(A##mi, 15); \
+ A##so ^= Do; \
+ Bmu = ROL64(A##so, 56); \
+ E##ma = Bma ^( Bme & Bmi ); \
+ Ca ^= E##ma; \
+ E##me = Bme ^( Bmi | Bmo ); \
+ Ce ^= E##me; \
+ E##mi = Bmi ^((~Bmo)| Bmu ); \
+ Ci ^= E##mi; \
+ E##mo = (~Bmo)^( Bmu & Bma ); \
+ Co ^= E##mo; \
+ E##mu = Bmu ^( Bma | Bme ); \
+ Cu ^= E##mu; \
+\
+ A##bi ^= Di; \
+ Bsa = ROL64(A##bi, 62); \
+ A##go ^= Do; \
+ Bse = ROL64(A##go, 55); \
+ A##ku ^= Du; \
+ Bsi = ROL64(A##ku, 39); \
+ A##ma ^= Da; \
+ Bso = ROL64(A##ma, 41); \
+ A##se ^= De; \
+ Bsu = ROL64(A##se, 2); \
+ E##sa = Bsa ^((~Bse)& Bsi ); \
+ Ca ^= E##sa; \
+ E##se = (~Bse)^( Bsi | Bso ); \
+ Ce ^= E##se; \
+ E##si = Bsi ^( Bso & Bsu ); \
+ Ci ^= E##si; \
+ E##so = Bso ^( Bsu | Bsa ); \
+ Co ^= E##so; \
+ E##su = Bsu ^( Bsa & Bse ); \
+ Cu ^= E##su; \
+\
+
+/* --- Code for round (lane complementing pattern 'bebigokimisa') */
+/* --- 64-bit lanes mapped to 64-bit words */
+#define thetaRhoPiChiIota(i, A, E) \
+ Da = Cu^ROL64(Ce, 1); \
+ De = Ca^ROL64(Ci, 1); \
+ Di = Ce^ROL64(Co, 1); \
+ Do = Ci^ROL64(Cu, 1); \
+ Du = Co^ROL64(Ca, 1); \
+\
+ A##ba ^= Da; \
+ Bba = A##ba; \
+ A##ge ^= De; \
+ Bbe = ROL64(A##ge, 44); \
+ A##ki ^= Di; \
+ Bbi = ROL64(A##ki, 43); \
+ A##mo ^= Do; \
+ Bbo = ROL64(A##mo, 21); \
+ A##su ^= Du; \
+ Bbu = ROL64(A##su, 14); \
+ E##ba = Bba ^( Bbe | Bbi ); \
+ E##ba ^= KeccakF1600RoundConstants[i]; \
+ E##be = Bbe ^((~Bbi)| Bbo ); \
+ E##bi = Bbi ^( Bbo & Bbu ); \
+ E##bo = Bbo ^( Bbu | Bba ); \
+ E##bu = Bbu ^( Bba & Bbe ); \
+\
+ A##bo ^= Do; \
+ Bga = ROL64(A##bo, 28); \
+ A##gu ^= Du; \
+ Bge = ROL64(A##gu, 20); \
+ A##ka ^= Da; \
+ Bgi = ROL64(A##ka, 3); \
+ A##me ^= De; \
+ Bgo = ROL64(A##me, 45); \
+ A##si ^= Di; \
+ Bgu = ROL64(A##si, 61); \
+ E##ga = Bga ^( Bge | Bgi ); \
+ E##ge = Bge ^( Bgi & Bgo ); \
+ E##gi = Bgi ^( Bgo |(~Bgu)); \
+ E##go = Bgo ^( Bgu | Bga ); \
+ E##gu = Bgu ^( Bga & Bge ); \
+\
+ A##be ^= De; \
+ Bka = ROL64(A##be, 1); \
+ A##gi ^= Di; \
+ Bke = ROL64(A##gi, 6); \
+ A##ko ^= Do; \
+ Bki = ROL64(A##ko, 25); \
+ A##mu ^= Du; \
+ Bko = ROL64(A##mu, 8); \
+ A##sa ^= Da; \
+ Bku = ROL64(A##sa, 18); \
+ E##ka = Bka ^( Bke | Bki ); \
+ E##ke = Bke ^( Bki & Bko ); \
+ E##ki = Bki ^((~Bko)& Bku ); \
+ E##ko = (~Bko)^( Bku | Bka ); \
+ E##ku = Bku ^( Bka & Bke ); \
+\
+ A##bu ^= Du; \
+ Bma = ROL64(A##bu, 27); \
+ A##ga ^= Da; \
+ Bme = ROL64(A##ga, 36); \
+ A##ke ^= De; \
+ Bmi = ROL64(A##ke, 10); \
+ A##mi ^= Di; \
+ Bmo = ROL64(A##mi, 15); \
+ A##so ^= Do; \
+ Bmu = ROL64(A##so, 56); \
+ E##ma = Bma ^( Bme & Bmi ); \
+ E##me = Bme ^( Bmi | Bmo ); \
+ E##mi = Bmi ^((~Bmo)| Bmu ); \
+ E##mo = (~Bmo)^( Bmu & Bma ); \
+ E##mu = Bmu ^( Bma | Bme ); \
+\
+ A##bi ^= Di; \
+ Bsa = ROL64(A##bi, 62); \
+ A##go ^= Do; \
+ Bse = ROL64(A##go, 55); \
+ A##ku ^= Du; \
+ Bsi = ROL64(A##ku, 39); \
+ A##ma ^= Da; \
+ Bso = ROL64(A##ma, 41); \
+ A##se ^= De; \
+ Bsu = ROL64(A##se, 2); \
+ E##sa = Bsa ^((~Bse)& Bsi ); \
+ E##se = (~Bse)^( Bsi | Bso ); \
+ E##si = Bsi ^( Bso & Bsu ); \
+ E##so = Bso ^( Bsu | Bsa ); \
+ E##su = Bsu ^( Bsa & Bse ); \
+\
+
+#else /* UseBebigokimisa */
+/* --- Code for round, with prepare-theta */
+/* --- 64-bit lanes mapped to 64-bit words */
+#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
+ Da = Cu^ROL64(Ce, 1); \
+ De = Ca^ROL64(Ci, 1); \
+ Di = Ce^ROL64(Co, 1); \
+ Do = Ci^ROL64(Cu, 1); \
+ Du = Co^ROL64(Ca, 1); \
+\
+ A##ba ^= Da; \
+ Bba = A##ba; \
+ A##ge ^= De; \
+ Bbe = ROL64(A##ge, 44); \
+ A##ki ^= Di; \
+ Bbi = ROL64(A##ki, 43); \
+ A##mo ^= Do; \
+ Bbo = ROL64(A##mo, 21); \
+ A##su ^= Du; \
+ Bbu = ROL64(A##su, 14); \
+ E##ba = Bba ^((~Bbe)& Bbi ); \
+ E##ba ^= KeccakF1600RoundConstants[i]; \
+ Ca = E##ba; \
+ E##be = Bbe ^((~Bbi)& Bbo ); \
+ Ce = E##be; \
+ E##bi = Bbi ^((~Bbo)& Bbu ); \
+ Ci = E##bi; \
+ E##bo = Bbo ^((~Bbu)& Bba ); \
+ Co = E##bo; \
+ E##bu = Bbu ^((~Bba)& Bbe ); \
+ Cu = E##bu; \
+\
+ A##bo ^= Do; \
+ Bga = ROL64(A##bo, 28); \
+ A##gu ^= Du; \
+ Bge = ROL64(A##gu, 20); \
+ A##ka ^= Da; \
+ Bgi = ROL64(A##ka, 3); \
+ A##me ^= De; \
+ Bgo = ROL64(A##me, 45); \
+ A##si ^= Di; \
+ Bgu = ROL64(A##si, 61); \
+ E##ga = Bga ^((~Bge)& Bgi ); \
+ Ca ^= E##ga; \
+ E##ge = Bge ^((~Bgi)& Bgo ); \
+ Ce ^= E##ge; \
+ E##gi = Bgi ^((~Bgo)& Bgu ); \
+ Ci ^= E##gi; \
+ E##go = Bgo ^((~Bgu)& Bga ); \
+ Co ^= E##go; \
+ E##gu = Bgu ^((~Bga)& Bge ); \
+ Cu ^= E##gu; \
+\
+ A##be ^= De; \
+ Bka = ROL64(A##be, 1); \
+ A##gi ^= Di; \
+ Bke = ROL64(A##gi, 6); \
+ A##ko ^= Do; \
+ Bki = ROL64(A##ko, 25); \
+ A##mu ^= Du; \
+ Bko = ROL64(A##mu, 8); \
+ A##sa ^= Da; \
+ Bku = ROL64(A##sa, 18); \
+ E##ka = Bka ^((~Bke)& Bki ); \
+ Ca ^= E##ka; \
+ E##ke = Bke ^((~Bki)& Bko ); \
+ Ce ^= E##ke; \
+ E##ki = Bki ^((~Bko)& Bku ); \
+ Ci ^= E##ki; \
+ E##ko = Bko ^((~Bku)& Bka ); \
+ Co ^= E##ko; \
+ E##ku = Bku ^((~Bka)& Bke ); \
+ Cu ^= E##ku; \
+\
+ A##bu ^= Du; \
+ Bma = ROL64(A##bu, 27); \
+ A##ga ^= Da; \
+ Bme = ROL64(A##ga, 36); \
+ A##ke ^= De; \
+ Bmi = ROL64(A##ke, 10); \
+ A##mi ^= Di; \
+ Bmo = ROL64(A##mi, 15); \
+ A##so ^= Do; \
+ Bmu = ROL64(A##so, 56); \
+ E##ma = Bma ^((~Bme)& Bmi ); \
+ Ca ^= E##ma; \
+ E##me = Bme ^((~Bmi)& Bmo ); \
+ Ce ^= E##me; \
+ E##mi = Bmi ^((~Bmo)& Bmu ); \
+ Ci ^= E##mi; \
+ E##mo = Bmo ^((~Bmu)& Bma ); \
+ Co ^= E##mo; \
+ E##mu = Bmu ^((~Bma)& Bme ); \
+ Cu ^= E##mu; \
+\
+ A##bi ^= Di; \
+ Bsa = ROL64(A##bi, 62); \
+ A##go ^= Do; \
+ Bse = ROL64(A##go, 55); \
+ A##ku ^= Du; \
+ Bsi = ROL64(A##ku, 39); \
+ A##ma ^= Da; \
+ Bso = ROL64(A##ma, 41); \
+ A##se ^= De; \
+ Bsu = ROL64(A##se, 2); \
+ E##sa = Bsa ^((~Bse)& Bsi ); \
+ Ca ^= E##sa; \
+ E##se = Bse ^((~Bsi)& Bso ); \
+ Ce ^= E##se; \
+ E##si = Bsi ^((~Bso)& Bsu ); \
+ Ci ^= E##si; \
+ E##so = Bso ^((~Bsu)& Bsa ); \
+ Co ^= E##so; \
+ E##su = Bsu ^((~Bsa)& Bse ); \
+ Cu ^= E##su; \
+\
+
+/* --- Code for round */
+/* --- 64-bit lanes mapped to 64-bit words */
+#define thetaRhoPiChiIota(i, A, E) \
+ Da = Cu^ROL64(Ce, 1); \
+ De = Ca^ROL64(Ci, 1); \
+ Di = Ce^ROL64(Co, 1); \
+ Do = Ci^ROL64(Cu, 1); \
+ Du = Co^ROL64(Ca, 1); \
+\
+ A##ba ^= Da; \
+ Bba = A##ba; \
+ A##ge ^= De; \
+ Bbe = ROL64(A##ge, 44); \
+ A##ki ^= Di; \
+ Bbi = ROL64(A##ki, 43); \
+ A##mo ^= Do; \
+ Bbo = ROL64(A##mo, 21); \
+ A##su ^= Du; \
+ Bbu = ROL64(A##su, 14); \
+ E##ba = Bba ^((~Bbe)& Bbi ); \
+ E##ba ^= KeccakF1600RoundConstants[i]; \
+ E##be = Bbe ^((~Bbi)& Bbo ); \
+ E##bi = Bbi ^((~Bbo)& Bbu ); \
+ E##bo = Bbo ^((~Bbu)& Bba ); \
+ E##bu = Bbu ^((~Bba)& Bbe ); \
+\
+ A##bo ^= Do; \
+ Bga = ROL64(A##bo, 28); \
+ A##gu ^= Du; \
+ Bge = ROL64(A##gu, 20); \
+ A##ka ^= Da; \
+ Bgi = ROL64(A##ka, 3); \
+ A##me ^= De; \
+ Bgo = ROL64(A##me, 45); \
+ A##si ^= Di; \
+ Bgu = ROL64(A##si, 61); \
+ E##ga = Bga ^((~Bge)& Bgi ); \
+ E##ge = Bge ^((~Bgi)& Bgo ); \
+ E##gi = Bgi ^((~Bgo)& Bgu ); \
+ E##go = Bgo ^((~Bgu)& Bga ); \
+ E##gu = Bgu ^((~Bga)& Bge ); \
+\
+ A##be ^= De; \
+ Bka = ROL64(A##be, 1); \
+ A##gi ^= Di; \
+ Bke = ROL64(A##gi, 6); \
+ A##ko ^= Do; \
+ Bki = ROL64(A##ko, 25); \
+ A##mu ^= Du; \
+ Bko = ROL64(A##mu, 8); \
+ A##sa ^= Da; \
+ Bku = ROL64(A##sa, 18); \
+ E##ka = Bka ^((~Bke)& Bki ); \
+ E##ke = Bke ^((~Bki)& Bko ); \
+ E##ki = Bki ^((~Bko)& Bku ); \
+ E##ko = Bko ^((~Bku)& Bka ); \
+ E##ku = Bku ^((~Bka)& Bke ); \
+\
+ A##bu ^= Du; \
+ Bma = ROL64(A##bu, 27); \
+ A##ga ^= Da; \
+ Bme = ROL64(A##ga, 36); \
+ A##ke ^= De; \
+ Bmi = ROL64(A##ke, 10); \
+ A##mi ^= Di; \
+ Bmo = ROL64(A##mi, 15); \
+ A##so ^= Do; \
+ Bmu = ROL64(A##so, 56); \
+ E##ma = Bma ^((~Bme)& Bmi ); \
+ E##me = Bme ^((~Bmi)& Bmo ); \
+ E##mi = Bmi ^((~Bmo)& Bmu ); \
+ E##mo = Bmo ^((~Bmu)& Bma ); \
+ E##mu = Bmu ^((~Bma)& Bme ); \
+\
+ A##bi ^= Di; \
+ Bsa = ROL64(A##bi, 62); \
+ A##go ^= Do; \
+ Bse = ROL64(A##go, 55); \
+ A##ku ^= Du; \
+ Bsi = ROL64(A##ku, 39); \
+ A##ma ^= Da; \
+ Bso = ROL64(A##ma, 41); \
+ A##se ^= De; \
+ Bsu = ROL64(A##se, 2); \
+ E##sa = Bsa ^((~Bse)& Bsi ); \
+ E##se = Bse ^((~Bsi)& Bso ); \
+ E##si = Bsi ^((~Bso)& Bsu ); \
+ E##so = Bso ^((~Bsu)& Bsa ); \
+ E##su = Bsu ^((~Bsa)& Bse ); \
+\
+
+#endif /* UseBebigokimisa */
+
+static const UINT64 KeccakF1600RoundConstants[24] = {
+ 0x0000000000000001ULL,
+ 0x0000000000008082ULL,
+ 0x800000000000808aULL,
+ 0x8000000080008000ULL,
+ 0x000000000000808bULL,
+ 0x0000000080000001ULL,
+ 0x8000000080008081ULL,
+ 0x8000000000008009ULL,
+ 0x000000000000008aULL,
+ 0x0000000000000088ULL,
+ 0x0000000080008009ULL,
+ 0x000000008000000aULL,
+ 0x000000008000808bULL,
+ 0x800000000000008bULL,
+ 0x8000000000008089ULL,
+ 0x8000000000008003ULL,
+ 0x8000000000008002ULL,
+ 0x8000000000000080ULL,
+ 0x000000000000800aULL,
+ 0x800000008000000aULL,
+ 0x8000000080008081ULL,
+ 0x8000000000008080ULL,
+ 0x0000000080000001ULL,
+ 0x8000000080008008ULL };
+
+#define copyFromStateAndXor576bits(X, state, input) \
+ X##ba = state[ 0]^input[ 0]; \
+ X##be = state[ 1]^input[ 1]; \
+ X##bi = state[ 2]^input[ 2]; \
+ X##bo = state[ 3]^input[ 3]; \
+ X##bu = state[ 4]^input[ 4]; \
+ X##ga = state[ 5]^input[ 5]; \
+ X##ge = state[ 6]^input[ 6]; \
+ X##gi = state[ 7]^input[ 7]; \
+ X##go = state[ 8]^input[ 8]; \
+ X##gu = state[ 9]; \
+ X##ka = state[10]; \
+ X##ke = state[11]; \
+ X##ki = state[12]; \
+ X##ko = state[13]; \
+ X##ku = state[14]; \
+ X##ma = state[15]; \
+ X##me = state[16]; \
+ X##mi = state[17]; \
+ X##mo = state[18]; \
+ X##mu = state[19]; \
+ X##sa = state[20]; \
+ X##se = state[21]; \
+ X##si = state[22]; \
+ X##so = state[23]; \
+ X##su = state[24]; \
+
+#define copyFromStateAndXor832bits(X, state, input) \
+ X##ba = state[ 0]^input[ 0]; \
+ X##be = state[ 1]^input[ 1]; \
+ X##bi = state[ 2]^input[ 2]; \
+ X##bo = state[ 3]^input[ 3]; \
+ X##bu = state[ 4]^input[ 4]; \
+ X##ga = state[ 5]^input[ 5]; \
+ X##ge = state[ 6]^input[ 6]; \
+ X##gi = state[ 7]^input[ 7]; \
+ X##go = state[ 8]^input[ 8]; \
+ X##gu = state[ 9]^input[ 9]; \
+ X##ka = state[10]^input[10]; \
+ X##ke = state[11]^input[11]; \
+ X##ki = state[12]^input[12]; \
+ X##ko = state[13]; \
+ X##ku = state[14]; \
+ X##ma = state[15]; \
+ X##me = state[16]; \
+ X##mi = state[17]; \
+ X##mo = state[18]; \
+ X##mu = state[19]; \
+ X##sa = state[20]; \
+ X##se = state[21]; \
+ X##si = state[22]; \
+ X##so = state[23]; \
+ X##su = state[24]; \
+
+#define copyFromStateAndXor1024bits(X, state, input) \
+ X##ba = state[ 0]^input[ 0]; \
+ X##be = state[ 1]^input[ 1]; \
+ X##bi = state[ 2]^input[ 2]; \
+ X##bo = state[ 3]^input[ 3]; \
+ X##bu = state[ 4]^input[ 4]; \
+ X##ga = state[ 5]^input[ 5]; \
+ X##ge = state[ 6]^input[ 6]; \
+ X##gi = state[ 7]^input[ 7]; \
+ X##go = state[ 8]^input[ 8]; \
+ X##gu = state[ 9]^input[ 9]; \
+ X##ka = state[10]^input[10]; \
+ X##ke = state[11]^input[11]; \
+ X##ki = state[12]^input[12]; \
+ X##ko = state[13]^input[13]; \
+ X##ku = state[14]^input[14]; \
+ X##ma = state[15]^input[15]; \
+ X##me = state[16]; \
+ X##mi = state[17]; \
+ X##mo = state[18]; \
+ X##mu = state[19]; \
+ X##sa = state[20]; \
+ X##se = state[21]; \
+ X##si = state[22]; \
+ X##so = state[23]; \
+ X##su = state[24]; \
+
+#define copyFromStateAndXor1088bits(X, state, input) \
+ X##ba = state[ 0]^input[ 0]; \
+ X##be = state[ 1]^input[ 1]; \
+ X##bi = state[ 2]^input[ 2]; \
+ X##bo = state[ 3]^input[ 3]; \
+ X##bu = state[ 4]^input[ 4]; \
+ X##ga = state[ 5]^input[ 5]; \
+ X##ge = state[ 6]^input[ 6]; \
+ X##gi = state[ 7]^input[ 7]; \
+ X##go = state[ 8]^input[ 8]; \
+ X##gu = state[ 9]^input[ 9]; \
+ X##ka = state[10]^input[10]; \
+ X##ke = state[11]^input[11]; \
+ X##ki = state[12]^input[12]; \
+ X##ko = state[13]^input[13]; \
+ X##ku = state[14]^input[14]; \
+ X##ma = state[15]^input[15]; \
+ X##me = state[16]^input[16]; \
+ X##mi = state[17]; \
+ X##mo = state[18]; \
+ X##mu = state[19]; \
+ X##sa = state[20]; \
+ X##se = state[21]; \
+ X##si = state[22]; \
+ X##so = state[23]; \
+ X##su = state[24]; \
+
+#define copyFromStateAndXor1152bits(X, state, input) \
+ X##ba = state[ 0]^input[ 0]; \
+ X##be = state[ 1]^input[ 1]; \
+ X##bi = state[ 2]^input[ 2]; \
+ X##bo = state[ 3]^input[ 3]; \
+ X##bu = state[ 4]^input[ 4]; \
+ X##ga = state[ 5]^input[ 5]; \
+ X##ge = state[ 6]^input[ 6]; \
+ X##gi = state[ 7]^input[ 7]; \
+ X##go = state[ 8]^input[ 8]; \
+ X##gu = state[ 9]^input[ 9]; \
+ X##ka = state[10]^input[10]; \
+ X##ke = state[11]^input[11]; \
+ X##ki = state[12]^input[12]; \
+ X##ko = state[13]^input[13]; \
+ X##ku = state[14]^input[14]; \
+ X##ma = state[15]^input[15]; \
+ X##me = state[16]^input[16]; \
+ X##mi = state[17]^input[17]; \
+ X##mo = state[18]; \
+ X##mu = state[19]; \
+ X##sa = state[20]; \
+ X##se = state[21]; \
+ X##si = state[22]; \
+ X##so = state[23]; \
+ X##su = state[24]; \
+
+#define copyFromStateAndXor1344bits(X, state, input) \
+ X##ba = state[ 0]^input[ 0]; \
+ X##be = state[ 1]^input[ 1]; \
+ X##bi = state[ 2]^input[ 2]; \
+ X##bo = state[ 3]^input[ 3]; \
+ X##bu = state[ 4]^input[ 4]; \
+ X##ga = state[ 5]^input[ 5]; \
+ X##ge = state[ 6]^input[ 6]; \
+ X##gi = state[ 7]^input[ 7]; \
+ X##go = state[ 8]^input[ 8]; \
+ X##gu = state[ 9]^input[ 9]; \
+ X##ka = state[10]^input[10]; \
+ X##ke = state[11]^input[11]; \
+ X##ki = state[12]^input[12]; \
+ X##ko = state[13]^input[13]; \
+ X##ku = state[14]^input[14]; \
+ X##ma = state[15]^input[15]; \
+ X##me = state[16]^input[16]; \
+ X##mi = state[17]^input[17]; \
+ X##mo = state[18]^input[18]; \
+ X##mu = state[19]^input[19]; \
+ X##sa = state[20]^input[20]; \
+ X##se = state[21]; \
+ X##si = state[22]; \
+ X##so = state[23]; \
+ X##su = state[24]; \
+
+#define copyFromState(X, state) \
+ X##ba = state[ 0]; \
+ X##be = state[ 1]; \
+ X##bi = state[ 2]; \
+ X##bo = state[ 3]; \
+ X##bu = state[ 4]; \
+ X##ga = state[ 5]; \
+ X##ge = state[ 6]; \
+ X##gi = state[ 7]; \
+ X##go = state[ 8]; \
+ X##gu = state[ 9]; \
+ X##ka = state[10]; \
+ X##ke = state[11]; \
+ X##ki = state[12]; \
+ X##ko = state[13]; \
+ X##ku = state[14]; \
+ X##ma = state[15]; \
+ X##me = state[16]; \
+ X##mi = state[17]; \
+ X##mo = state[18]; \
+ X##mu = state[19]; \
+ X##sa = state[20]; \
+ X##se = state[21]; \
+ X##si = state[22]; \
+ X##so = state[23]; \
+ X##su = state[24]; \
+
+#define copyToState(state, X) \
+ state[ 0] = X##ba; \
+ state[ 1] = X##be; \
+ state[ 2] = X##bi; \
+ state[ 3] = X##bo; \
+ state[ 4] = X##bu; \
+ state[ 5] = X##ga; \
+ state[ 6] = X##ge; \
+ state[ 7] = X##gi; \
+ state[ 8] = X##go; \
+ state[ 9] = X##gu; \
+ state[10] = X##ka; \
+ state[11] = X##ke; \
+ state[12] = X##ki; \
+ state[13] = X##ko; \
+ state[14] = X##ku; \
+ state[15] = X##ma; \
+ state[16] = X##me; \
+ state[17] = X##mi; \
+ state[18] = X##mo; \
+ state[19] = X##mu; \
+ state[20] = X##sa; \
+ state[21] = X##se; \
+ state[22] = X##si; \
+ state[23] = X##so; \
+ state[24] = X##su; \
+
+#define copyStateVariables(X, Y) \
+ X##ba = Y##ba; \
+ X##be = Y##be; \
+ X##bi = Y##bi; \
+ X##bo = Y##bo; \
+ X##bu = Y##bu; \
+ X##ga = Y##ga; \
+ X##ge = Y##ge; \
+ X##gi = Y##gi; \
+ X##go = Y##go; \
+ X##gu = Y##gu; \
+ X##ka = Y##ka; \
+ X##ke = Y##ke; \
+ X##ki = Y##ki; \
+ X##ko = Y##ko; \
+ X##ku = Y##ku; \
+ X##ma = Y##ma; \
+ X##me = Y##me; \
+ X##mi = Y##mi; \
+ X##mo = Y##mo; \
+ X##mu = Y##mu; \
+ X##sa = Y##sa; \
+ X##se = Y##se; \
+ X##si = Y##si; \
+ X##so = Y##so; \
+ X##su = Y##su; \
+
diff --git a/Modules/_sha3/keccak/KeccakF-1600-int-set.h b/Modules/_sha3/keccak/KeccakF-1600-int-set.h
new file mode 100644
index 0000000000..0ed1d802e3
--- /dev/null
+++ b/Modules/_sha3/keccak/KeccakF-1600-int-set.h
@@ -0,0 +1,6 @@
+#define ProvideFast576
+#define ProvideFast832
+#define ProvideFast1024
+#define ProvideFast1088
+#define ProvideFast1152
+#define ProvideFast1344
diff --git a/Modules/_sha3/keccak/KeccakF-1600-interface.h b/Modules/_sha3/keccak/KeccakF-1600-interface.h
new file mode 100644
index 0000000000..ce2710eeb2
--- /dev/null
+++ b/Modules/_sha3/keccak/KeccakF-1600-interface.h
@@ -0,0 +1,46 @@
+/*
+The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
+Michaël Peeters and Gilles Van Assche. For more information, feedback or
+questions, please refer to our website: http://keccak.noekeon.org/
+
+Implementation by the designers,
+hereby denoted as "the implementer".
+
+To the extent possible under law, the implementer has waived all copyright
+and related or neighboring rights to the source code in this file.
+http://creativecommons.org/publicdomain/zero/1.0/
+*/
+
+#ifndef _KeccakPermutationInterface_h_
+#define _KeccakPermutationInterface_h_
+
+#include "KeccakF-1600-int-set.h"
+
+static void KeccakInitialize( void );
+static void KeccakInitializeState(unsigned char *state);
+static void KeccakPermutation(unsigned char *state);
+#ifdef ProvideFast576
+static void KeccakAbsorb576bits(unsigned char *state, const unsigned char *data);
+#endif
+#ifdef ProvideFast832
+static void KeccakAbsorb832bits(unsigned char *state, const unsigned char *data);
+#endif
+#ifdef ProvideFast1024
+static void KeccakAbsorb1024bits(unsigned char *state, const unsigned char *data);
+#endif
+#ifdef ProvideFast1088
+static void KeccakAbsorb1088bits(unsigned char *state, const unsigned char *data);
+#endif
+#ifdef ProvideFast1152
+static void KeccakAbsorb1152bits(unsigned char *state, const unsigned char *data);
+#endif
+#ifdef ProvideFast1344
+static void KeccakAbsorb1344bits(unsigned char *state, const unsigned char *data);
+#endif
+static void KeccakAbsorb(unsigned char *state, const unsigned char *data, unsigned int laneCount);
+#ifdef ProvideFast1024
+static void KeccakExtract1024bits(const unsigned char *state, unsigned char *data);
+#endif
+static void KeccakExtract(const unsigned char *state, unsigned char *data, unsigned int laneCount);
+
+#endif
diff --git a/Modules/_sha3/keccak/KeccakF-1600-opt32-settings.h b/Modules/_sha3/keccak/KeccakF-1600-opt32-settings.h
new file mode 100644
index 0000000000..615c78217e
--- /dev/null
+++ b/Modules/_sha3/keccak/KeccakF-1600-opt32-settings.h
@@ -0,0 +1,6 @@
+/*
+#define Unrolling 2
+#define UseBebigokimisa
+#define UseInterleaveTables
+#define UseSchedule 3
+*/
diff --git a/Modules/_sha3/keccak/KeccakF-1600-opt32.c b/Modules/_sha3/keccak/KeccakF-1600-opt32.c
new file mode 100644
index 0000000000..dba6d59f13
--- /dev/null
+++ b/Modules/_sha3/keccak/KeccakF-1600-opt32.c
@@ -0,0 +1,524 @@
+/*
+The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
+Michaël Peeters and Gilles Van Assche. For more information, feedback or
+questions, please refer to our website: http://keccak.noekeon.org/
+
+Implementation by the designers,
+hereby denoted as "the implementer".
+
+To the extent possible under law, the implementer has waived all copyright
+and related or neighboring rights to the source code in this file.
+http://creativecommons.org/publicdomain/zero/1.0/
+*/
+
+#include <string.h>
+/* #include "brg_endian.h" */
+#include "KeccakF-1600-opt32-settings.h"
+#include "KeccakF-1600-interface.h"
+
+typedef unsigned char UINT8;
+typedef unsigned short UINT16;
+typedef unsigned int UINT32;
+/* typedef unsigned long long int UINT64; */
+
+#ifdef UseInterleaveTables
+static int interleaveTablesBuilt = 0;
+static UINT16 interleaveTable[65536];
+static UINT16 deinterleaveTable[65536];
+
+static void buildInterleaveTables()
+{
+ UINT32 i, j;
+ UINT16 x;
+
+ if (!interleaveTablesBuilt) {
+ for(i=0; i<65536; i++) {
+ x = 0;
+ for(j=0; j<16; j++) {
+ if (i & (1 << j))
+ x |= (1 << (j/2 + 8*(j%2)));
+ }
+ interleaveTable[i] = x;
+ deinterleaveTable[x] = (UINT16)i;
+ }
+ interleaveTablesBuilt = 1;
+ }
+}
+
+#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
+
+#define xor2bytesIntoInterleavedWords(even, odd, source, j) \
+ i##j = interleaveTable[((const UINT16*)source)[j]]; \
+ ((UINT8*)even)[j] ^= i##j & 0xFF; \
+ ((UINT8*)odd)[j] ^= i##j >> 8;
+
+#define setInterleavedWordsInto2bytes(dest, even, odd, j) \
+ d##j = deinterleaveTable[((even >> (j*8)) & 0xFF) ^ (((odd >> (j*8)) & 0xFF) << 8)]; \
+ ((UINT16*)dest)[j] = d##j;
+
+#else /* (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN) */
+
+#define xor2bytesIntoInterleavedWords(even, odd, source, j) \
+ i##j = interleaveTable[source[2*j] ^ ((UINT16)source[2*j+1] << 8)]; \
+ *even ^= (i##j & 0xFF) << (j*8); \
+ *odd ^= ((i##j >> 8) & 0xFF) << (j*8);
+
+#define setInterleavedWordsInto2bytes(dest, even, odd, j) \
+ d##j = deinterleaveTable[((even >> (j*8)) & 0xFF) ^ (((odd >> (j*8)) & 0xFF) << 8)]; \
+ dest[2*j] = d##j & 0xFF; \
+ dest[2*j+1] = d##j >> 8;
+
+#endif /* Endianness */
+
+static void xor8bytesIntoInterleavedWords(UINT32 *even, UINT32 *odd, const UINT8* source)
+{
+ UINT16 i0, i1, i2, i3;
+
+ xor2bytesIntoInterleavedWords(even, odd, source, 0)
+ xor2bytesIntoInterleavedWords(even, odd, source, 1)
+ xor2bytesIntoInterleavedWords(even, odd, source, 2)
+ xor2bytesIntoInterleavedWords(even, odd, source, 3)
+}
+
+#define xorLanesIntoState(laneCount, state, input) \
+ { \
+ int i; \
+ for(i=0; i<(laneCount); i++) \
+ xor8bytesIntoInterleavedWords(state+i*2, state+i*2+1, input+i*8); \
+ }
+
+static void setInterleavedWordsInto8bytes(UINT8* dest, UINT32 even, UINT32 odd)
+{
+ UINT16 d0, d1, d2, d3;
+
+ setInterleavedWordsInto2bytes(dest, even, odd, 0)
+ setInterleavedWordsInto2bytes(dest, even, odd, 1)
+ setInterleavedWordsInto2bytes(dest, even, odd, 2)
+ setInterleavedWordsInto2bytes(dest, even, odd, 3)
+}
+
+#define extractLanes(laneCount, state, data) \
+ { \
+ int i; \
+ for(i=0; i<(laneCount); i++) \
+ setInterleavedWordsInto8bytes(data+i*8, ((UINT32*)state)[i*2], ((UINT32*)state)[i*2+1]); \
+ }
+
+#else /* No interleaving tables */
+
+#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
+
+/* Credit: Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
+#define xorInterleavedLE(rateInLanes, state, input) \
+ { \
+ const UINT32 * pI = (const UINT32 *)input; \
+ UINT32 * pS = state; \
+ UINT32 t, x0, x1; \
+ int i; \
+ for (i = (rateInLanes)-1; i >= 0; --i) \
+ { \
+ x0 = *(pI++); \
+ t = (x0 ^ (x0 >> 1)) & 0x22222222UL; x0 = x0 ^ t ^ (t << 1); \
+ t = (x0 ^ (x0 >> 2)) & 0x0C0C0C0CUL; x0 = x0 ^ t ^ (t << 2); \
+ t = (x0 ^ (x0 >> 4)) & 0x00F000F0UL; x0 = x0 ^ t ^ (t << 4); \
+ t = (x0 ^ (x0 >> 8)) & 0x0000FF00UL; x0 = x0 ^ t ^ (t << 8); \
+ x1 = *(pI++); \
+ t = (x1 ^ (x1 >> 1)) & 0x22222222UL; x1 = x1 ^ t ^ (t << 1); \
+ t = (x1 ^ (x1 >> 2)) & 0x0C0C0C0CUL; x1 = x1 ^ t ^ (t << 2); \
+ t = (x1 ^ (x1 >> 4)) & 0x00F000F0UL; x1 = x1 ^ t ^ (t << 4); \
+ t = (x1 ^ (x1 >> 8)) & 0x0000FF00UL; x1 = x1 ^ t ^ (t << 8); \
+ *(pS++) ^= (UINT16)x0 | (x1 << 16); \
+ *(pS++) ^= (x0 >> 16) | (x1 & 0xFFFF0000); \
+ } \
+ }
+
+#define xorLanesIntoState(laneCount, state, input) \
+ xorInterleavedLE(laneCount, state, input)
+
+#else /* (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN) */
+
+/* Credit: Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
+UINT64 toInterleaving(UINT64 x)
+{
+ UINT64 t;
+
+ t = (x ^ (x >> 1)) & 0x2222222222222222ULL; x = x ^ t ^ (t << 1);
+ t = (x ^ (x >> 2)) & 0x0C0C0C0C0C0C0C0CULL; x = x ^ t ^ (t << 2);
+ t = (x ^ (x >> 4)) & 0x00F000F000F000F0ULL; x = x ^ t ^ (t << 4);
+ t = (x ^ (x >> 8)) & 0x0000FF000000FF00ULL; x = x ^ t ^ (t << 8);
+ t = (x ^ (x >> 16)) & 0x00000000FFFF0000ULL; x = x ^ t ^ (t << 16);
+
+ return x;
+}
+
+static void xor8bytesIntoInterleavedWords(UINT32* evenAndOdd, const UINT8* source)
+{
+ /* This can be optimized */
+ UINT64 sourceWord =
+ (UINT64)source[0]
+ ^ (((UINT64)source[1]) << 8)
+ ^ (((UINT64)source[2]) << 16)
+ ^ (((UINT64)source[3]) << 24)
+ ^ (((UINT64)source[4]) << 32)
+ ^ (((UINT64)source[5]) << 40)
+ ^ (((UINT64)source[6]) << 48)
+ ^ (((UINT64)source[7]) << 56);
+ UINT64 evenAndOddWord = toInterleaving(sourceWord);
+ evenAndOdd[0] ^= (UINT32)evenAndOddWord;
+ evenAndOdd[1] ^= (UINT32)(evenAndOddWord >> 32);
+}
+
+#define xorLanesIntoState(laneCount, state, input) \
+ { \
+ int i; \
+ for(i=0; i<(laneCount); i++) \
+ xor8bytesIntoInterleavedWords(state+i*2, input+i*8); \
+ }
+
+#endif /* Endianness */
+
+/* Credit: Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
+UINT64 fromInterleaving(UINT64 x)
+{
+ UINT64 t;
+
+ t = (x ^ (x >> 16)) & 0x00000000FFFF0000ULL; x = x ^ t ^ (t << 16);
+ t = (x ^ (x >> 8)) & 0x0000FF000000FF00ULL; x = x ^ t ^ (t << 8);
+ t = (x ^ (x >> 4)) & 0x00F000F000F000F0ULL; x = x ^ t ^ (t << 4);
+ t = (x ^ (x >> 2)) & 0x0C0C0C0C0C0C0C0CULL; x = x ^ t ^ (t << 2);
+ t = (x ^ (x >> 1)) & 0x2222222222222222ULL; x = x ^ t ^ (t << 1);
+
+ return x;
+}
+
+static void setInterleavedWordsInto8bytes(UINT8* dest, UINT32* evenAndOdd)
+{
+#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
+ ((UINT64*)dest)[0] = fromInterleaving(*(UINT64*)evenAndOdd);
+#else /* (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN) */
+ /* This can be optimized */
+ UINT64 evenAndOddWord = (UINT64)evenAndOdd[0] ^ ((UINT64)evenAndOdd[1] << 32);
+ UINT64 destWord = fromInterleaving(evenAndOddWord);
+ dest[0] = destWord & 0xFF;
+ dest[1] = (destWord >> 8) & 0xFF;
+ dest[2] = (destWord >> 16) & 0xFF;
+ dest[3] = (destWord >> 24) & 0xFF;
+ dest[4] = (destWord >> 32) & 0xFF;
+ dest[5] = (destWord >> 40) & 0xFF;
+ dest[6] = (destWord >> 48) & 0xFF;
+ dest[7] = (destWord >> 56) & 0xFF;
+#endif /* Endianness */
+}
+
+#define extractLanes(laneCount, state, data) \
+ { \
+ int i; \
+ for(i=0; i<(laneCount); i++) \
+ setInterleavedWordsInto8bytes(data+i*8, (UINT32*)state+i*2); \
+ }
+
+#endif /* With or without interleaving tables */
+
+#if defined(_MSC_VER)
+#define ROL32(a, offset) _rotl(a, offset)
+#elif (defined (__arm__) && defined(__ARMCC_VERSION))
+#define ROL32(a, offset) __ror(a, 32-(offset))
+#else
+#define ROL32(a, offset) ((((UINT32)a) << (offset)) ^ (((UINT32)a) >> (32-(offset))))
+#endif
+
+#include "KeccakF-1600-unrolling.macros"
+#include "KeccakF-1600-32.macros"
+
+#if (UseSchedule == 3)
+
+#ifdef UseBebigokimisa
+#error "No lane complementing with schedule 3."
+#endif
+
+#if (Unrolling != 2)
+#error "Only unrolling 2 is supported by schedule 3."
+#endif
+
+static void KeccakPermutationOnWords(UINT32 *state)
+{
+ rounds
+}
+
+static void KeccakPermutationOnWordsAfterXoring(UINT32 *state, const UINT8 *input, unsigned int laneCount)
+{
+ xorLanesIntoState(laneCount, state, input)
+ rounds
+}
+
+#ifdef ProvideFast576
+static void KeccakPermutationOnWordsAfterXoring576bits(UINT32 *state, const UINT8 *input)
+{
+ xorLanesIntoState(9, state, input)
+ rounds
+}
+#endif
+
+#ifdef ProvideFast832
+static void KeccakPermutationOnWordsAfterXoring832bits(UINT32 *state, const UINT8 *input)
+{
+ xorLanesIntoState(13, state, input)
+ rounds
+}
+#endif
+
+#ifdef ProvideFast1024
+static void KeccakPermutationOnWordsAfterXoring1024bits(UINT32 *state, const UINT8 *input)
+{
+ xorLanesIntoState(16, state, input)
+ rounds
+}
+#endif
+
+#ifdef ProvideFast1088
+static void KeccakPermutationOnWordsAfterXoring1088bits(UINT32 *state, const UINT8 *input)
+{
+ xorLanesIntoState(17, state, input)
+ rounds
+}
+#endif
+
+#ifdef ProvideFast1152
+static void KeccakPermutationOnWordsAfterXoring1152bits(UINT32 *state, const UINT8 *input)
+{
+ xorLanesIntoState(18, state, input)
+ rounds
+}
+#endif
+
+#ifdef ProvideFast1344
+static void KeccakPermutationOnWordsAfterXoring1344bits(UINT32 *state, const UINT8 *input)
+{
+ xorLanesIntoState(21, state, input)
+ rounds
+}
+#endif
+
+#else /* (Schedule != 3) */
+
+static void KeccakPermutationOnWords(UINT32 *state)
+{
+ declareABCDE
+#if (Unrolling != 24)
+ unsigned int i;
+#endif
+
+ copyFromState(A, state)
+ rounds
+}
+
+static void KeccakPermutationOnWordsAfterXoring(UINT32 *state, const UINT8 *input, unsigned int laneCount)
+{
+ declareABCDE
+ unsigned int i;
+
+ xorLanesIntoState(laneCount, state, input)
+ copyFromState(A, state)
+ rounds
+}
+
+#ifdef ProvideFast576
+static void KeccakPermutationOnWordsAfterXoring576bits(UINT32 *state, const UINT8 *input)
+{
+ declareABCDE
+ unsigned int i;
+
+ xorLanesIntoState(9, state, input)
+ copyFromState(A, state)
+ rounds
+}
+#endif
+
+#ifdef ProvideFast832
+static void KeccakPermutationOnWordsAfterXoring832bits(UINT32 *state, const UINT8 *input)
+{
+ declareABCDE
+ unsigned int i;
+
+ xorLanesIntoState(13, state, input)
+ copyFromState(A, state)
+ rounds
+}
+#endif
+
+#ifdef ProvideFast1024
+static void KeccakPermutationOnWordsAfterXoring1024bits(UINT32 *state, const UINT8 *input)
+{
+ declareABCDE
+ unsigned int i;
+
+ xorLanesIntoState(16, state, input)
+ copyFromState(A, state)
+ rounds
+}
+#endif
+
+#ifdef ProvideFast1088
+static void KeccakPermutationOnWordsAfterXoring1088bits(UINT32 *state, const UINT8 *input)
+{
+ declareABCDE
+ unsigned int i;
+
+ xorLanesIntoState(17, state, input)
+ copyFromState(A, state)
+ rounds
+}
+#endif
+
+#ifdef ProvideFast1152
+static void KeccakPermutationOnWordsAfterXoring1152bits(UINT32 *state, const UINT8 *input)
+{
+ declareABCDE
+ unsigned int i;
+
+ xorLanesIntoState(18, state, input)
+ copyFromState(A, state)
+ rounds
+}
+#endif
+
+#ifdef ProvideFast1344
+static void KeccakPermutationOnWordsAfterXoring1344bits(UINT32 *state, const UINT8 *input)
+{
+ declareABCDE
+ unsigned int i;
+
+ xorLanesIntoState(21, state, input)
+ copyFromState(A, state)
+ rounds
+}
+#endif
+
+#endif
+
+static void KeccakInitialize()
+{
+#ifdef UseInterleaveTables
+ buildInterleaveTables();
+#endif
+}
+
+static void KeccakInitializeState(unsigned char *state)
+{
+ memset(state, 0, 200);
+#ifdef UseBebigokimisa
+ ((UINT32*)state)[ 2] = ~(UINT32)0;
+ ((UINT32*)state)[ 3] = ~(UINT32)0;
+ ((UINT32*)state)[ 4] = ~(UINT32)0;
+ ((UINT32*)state)[ 5] = ~(UINT32)0;
+ ((UINT32*)state)[16] = ~(UINT32)0;
+ ((UINT32*)state)[17] = ~(UINT32)0;
+ ((UINT32*)state)[24] = ~(UINT32)0;
+ ((UINT32*)state)[25] = ~(UINT32)0;
+ ((UINT32*)state)[34] = ~(UINT32)0;
+ ((UINT32*)state)[35] = ~(UINT32)0;
+ ((UINT32*)state)[40] = ~(UINT32)0;
+ ((UINT32*)state)[41] = ~(UINT32)0;
+#endif
+}
+
+static void KeccakPermutation(unsigned char *state)
+{
+ /* We assume the state is always stored as interleaved 32-bit words */
+ KeccakPermutationOnWords((UINT32*)state);
+}
+
+#ifdef ProvideFast576
+static void KeccakAbsorb576bits(unsigned char *state, const unsigned char *data)
+{
+ KeccakPermutationOnWordsAfterXoring576bits((UINT32*)state, data);
+}
+#endif
+
+#ifdef ProvideFast832
+static void KeccakAbsorb832bits(unsigned char *state, const unsigned char *data)
+{
+ KeccakPermutationOnWordsAfterXoring832bits((UINT32*)state, data);
+}
+#endif
+
+#ifdef ProvideFast1024
+static void KeccakAbsorb1024bits(unsigned char *state, const unsigned char *data)
+{
+ KeccakPermutationOnWordsAfterXoring1024bits((UINT32*)state, data);
+}
+#endif
+
+#ifdef ProvideFast1088
+static void KeccakAbsorb1088bits(unsigned char *state, const unsigned char *data)
+{
+ KeccakPermutationOnWordsAfterXoring1088bits((UINT32*)state, data);
+}
+#endif
+
+#ifdef ProvideFast1152
+static void KeccakAbsorb1152bits(unsigned char *state, const unsigned char *data)
+{
+ KeccakPermutationOnWordsAfterXoring1152bits((UINT32*)state, data);
+}
+#endif
+
+#ifdef ProvideFast1344
+static void KeccakAbsorb1344bits(unsigned char *state, const unsigned char *data)
+{
+ KeccakPermutationOnWordsAfterXoring1344bits((UINT32*)state, data);
+}
+#endif
+
+static void KeccakAbsorb(unsigned char *state, const unsigned char *data, unsigned int laneCount)
+{
+ KeccakPermutationOnWordsAfterXoring((UINT32*)state, data, laneCount);
+}
+
+#ifdef ProvideFast1024
+static void KeccakExtract1024bits(const unsigned char *state, unsigned char *data)
+{
+ extractLanes(16, state, data)
+#ifdef UseBebigokimisa
+ ((UINT32*)data)[ 2] = ~((UINT32*)data)[ 2];
+ ((UINT32*)data)[ 3] = ~((UINT32*)data)[ 3];
+ ((UINT32*)data)[ 4] = ~((UINT32*)data)[ 4];
+ ((UINT32*)data)[ 5] = ~((UINT32*)data)[ 5];
+ ((UINT32*)data)[16] = ~((UINT32*)data)[16];
+ ((UINT32*)data)[17] = ~((UINT32*)data)[17];
+ ((UINT32*)data)[24] = ~((UINT32*)data)[24];
+ ((UINT32*)data)[25] = ~((UINT32*)data)[25];
+#endif
+}
+#endif
+
+static void KeccakExtract(const unsigned char *state, unsigned char *data, unsigned int laneCount)
+{
+ extractLanes(laneCount, state, data)
+#ifdef UseBebigokimisa
+ if (laneCount > 1) {
+ ((UINT32*)data)[ 2] = ~((UINT32*)data)[ 2];
+ ((UINT32*)data)[ 3] = ~((UINT32*)data)[ 3];
+ if (laneCount > 2) {
+ ((UINT32*)data)[ 4] = ~((UINT32*)data)[ 4];
+ ((UINT32*)data)[ 5] = ~((UINT32*)data)[ 5];
+ if (laneCount > 8) {
+ ((UINT32*)data)[16] = ~((UINT32*)data)[16];
+ ((UINT32*)data)[17] = ~((UINT32*)data)[17];
+ if (laneCount > 12) {
+ ((UINT32*)data)[24] = ~((UINT32*)data)[24];
+ ((UINT32*)data)[25] = ~((UINT32*)data)[25];
+ if (laneCount > 17) {
+ ((UINT32*)data)[34] = ~((UINT32*)data)[34];
+ ((UINT32*)data)[35] = ~((UINT32*)data)[35];
+ if (laneCount > 20) {
+ ((UINT32*)data)[40] = ~((UINT32*)data)[40];
+ ((UINT32*)data)[41] = ~((UINT32*)data)[41];
+ }
+ }
+ }
+ }
+ }
+ }
+#endif
+}
diff --git a/Modules/_sha3/keccak/KeccakF-1600-opt64-settings.h b/Modules/_sha3/keccak/KeccakF-1600-opt64-settings.h
new file mode 100644
index 0000000000..df83e6331f
--- /dev/null
+++ b/Modules/_sha3/keccak/KeccakF-1600-opt64-settings.h
@@ -0,0 +1,9 @@
+/*
+#define Unrolling 24
+#define UseBebigokimisa
+#define UseSSE
+#define UseOnlySIMD64
+#define UseMMX
+#define UseSHLD
+#define UseXOP
+*/
diff --git a/Modules/_sha3/keccak/KeccakF-1600-opt64.c b/Modules/_sha3/keccak/KeccakF-1600-opt64.c
new file mode 100644
index 0000000000..f19b18b36a
--- /dev/null
+++ b/Modules/_sha3/keccak/KeccakF-1600-opt64.c
@@ -0,0 +1,510 @@
+/*
+The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
+Michaël Peeters and Gilles Van Assche. For more information, feedback or
+questions, please refer to our website: http://keccak.noekeon.org/
+
+Implementation by the designers,
+hereby denoted as "the implementer".
+
+To the extent possible under law, the implementer has waived all copyright
+and related or neighboring rights to the source code in this file.
+http://creativecommons.org/publicdomain/zero/1.0/
+*/
+
+#include <string.h>
+/* #include "brg_endian.h" */
+#include "KeccakF-1600-opt64-settings.h"
+#include "KeccakF-1600-interface.h"
+
+typedef unsigned char UINT8;
+/* typedef unsigned long long int UINT64; */
+
+#if defined(__GNUC__)
+#define ALIGN __attribute__ ((aligned(32)))
+#elif defined(_MSC_VER)
+#define ALIGN __declspec(align(32))
+#else
+#define ALIGN
+#endif
+
+#if defined(UseSSE)
+ #include <x86intrin.h>
+ typedef __m128i V64;
+ typedef __m128i V128;
+ typedef union {
+ V128 v128;
+ UINT64 v64[2];
+ } V6464;
+
+ #define ANDnu64(a, b) _mm_andnot_si128(a, b)
+ #define LOAD64(a) _mm_loadl_epi64((const V64 *)&(a))
+ #define CONST64(a) _mm_loadl_epi64((const V64 *)&(a))
+ #define ROL64(a, o) _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o)))
+ #define STORE64(a, b) _mm_storel_epi64((V64 *)&(a), b)
+ #define XOR64(a, b) _mm_xor_si128(a, b)
+ #define XOReq64(a, b) a = _mm_xor_si128(a, b)
+ #define SHUFFLEBYTES128(a, b) _mm_shuffle_epi8(a, b)
+
+ #define ANDnu128(a, b) _mm_andnot_si128(a, b)
+ #define LOAD6464(a, b) _mm_set_epi64((__m64)(a), (__m64)(b))
+ #define CONST128(a) _mm_load_si128((const V128 *)&(a))
+ #define LOAD128(a) _mm_load_si128((const V128 *)&(a))
+ #define LOAD128u(a) _mm_loadu_si128((const V128 *)&(a))
+ #define ROL64in128(a, o) _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o)))
+ #define STORE128(a, b) _mm_store_si128((V128 *)&(a), b)
+ #define XOR128(a, b) _mm_xor_si128(a, b)
+ #define XOReq128(a, b) a = _mm_xor_si128(a, b)
+ #define GET64LOLO(a, b) _mm_unpacklo_epi64(a, b)
+ #define GET64HIHI(a, b) _mm_unpackhi_epi64(a, b)
+ #define COPY64HI2LO(a) _mm_shuffle_epi32(a, 0xEE)
+ #define COPY64LO2HI(a) _mm_shuffle_epi32(a, 0x44)
+ #define ZERO128() _mm_setzero_si128()
+
+ #ifdef UseOnlySIMD64
+ #include "KeccakF-1600-simd64.macros"
+ #else
+ALIGN const UINT64 rho8_56[2] = {0x0605040302010007, 0x080F0E0D0C0B0A09};
+ #include "KeccakF-1600-simd128.macros"
+ #endif
+
+ #ifdef UseBebigokimisa
+ #error "UseBebigokimisa cannot be used in combination with UseSSE"
+ #endif
+#elif defined(UseXOP)
+ #include <x86intrin.h>
+ typedef __m128i V64;
+ typedef __m128i V128;
+
+ #define LOAD64(a) _mm_loadl_epi64((const V64 *)&(a))
+ #define CONST64(a) _mm_loadl_epi64((const V64 *)&(a))
+ #define STORE64(a, b) _mm_storel_epi64((V64 *)&(a), b)
+ #define XOR64(a, b) _mm_xor_si128(a, b)
+ #define XOReq64(a, b) a = _mm_xor_si128(a, b)
+
+ #define ANDnu128(a, b) _mm_andnot_si128(a, b)
+ #define LOAD6464(a, b) _mm_set_epi64((__m64)(a), (__m64)(b))
+ #define CONST128(a) _mm_load_si128((const V128 *)&(a))
+ #define LOAD128(a) _mm_load_si128((const V128 *)&(a))
+ #define LOAD128u(a) _mm_loadu_si128((const V128 *)&(a))
+ #define STORE128(a, b) _mm_store_si128((V128 *)&(a), b)
+ #define XOR128(a, b) _mm_xor_si128(a, b)
+ #define XOReq128(a, b) a = _mm_xor_si128(a, b)
+ #define ZERO128() _mm_setzero_si128()
+
+ #define SWAP64(a) _mm_shuffle_epi32(a, 0x4E)
+ #define GET64LOLO(a, b) _mm_unpacklo_epi64(a, b)
+ #define GET64HIHI(a, b) _mm_unpackhi_epi64(a, b)
+ #define GET64LOHI(a, b) ((__m128i)_mm_blend_pd((__m128d)a, (__m128d)b, 2))
+ #define GET64HILO(a, b) SWAP64(GET64LOHI(b, a))
+ #define COPY64HI2LO(a) _mm_shuffle_epi32(a, 0xEE)
+ #define COPY64LO2HI(a) _mm_shuffle_epi32(a, 0x44)
+
+ #define ROL6464same(a, o) _mm_roti_epi64(a, o)
+ #define ROL6464(a, r1, r2) _mm_rot_epi64(a, CONST128( rot_##r1##_##r2 ))
+ALIGN const UINT64 rot_0_20[2] = { 0, 20};
+ALIGN const UINT64 rot_44_3[2] = {44, 3};
+ALIGN const UINT64 rot_43_45[2] = {43, 45};
+ALIGN const UINT64 rot_21_61[2] = {21, 61};
+ALIGN const UINT64 rot_14_28[2] = {14, 28};
+ALIGN const UINT64 rot_1_36[2] = { 1, 36};
+ALIGN const UINT64 rot_6_10[2] = { 6, 10};
+ALIGN const UINT64 rot_25_15[2] = {25, 15};
+ALIGN const UINT64 rot_8_56[2] = { 8, 56};
+ALIGN const UINT64 rot_18_27[2] = {18, 27};
+ALIGN const UINT64 rot_62_55[2] = {62, 55};
+ALIGN const UINT64 rot_39_41[2] = {39, 41};
+
+#if defined(UseSimulatedXOP)
+ /* For debugging purposes, when XOP is not available */
+ #undef ROL6464
+ #undef ROL6464same
+ #define ROL6464same(a, o) _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o)))
+ V128 ROL6464(V128 a, int r0, int r1)
+ {
+ V128 a0 = ROL64(a, r0);
+ V128 a1 = COPY64HI2LO(ROL64(a, r1));
+ return GET64LOLO(a0, a1);
+ }
+#endif
+
+ #include "KeccakF-1600-xop.macros"
+
+ #ifdef UseBebigokimisa
+ #error "UseBebigokimisa cannot be used in combination with UseXOP"
+ #endif
+#elif defined(UseMMX)
+ #include <mmintrin.h>
+ typedef __m64 V64;
+ #define ANDnu64(a, b) _mm_andnot_si64(a, b)
+
+ #if (defined(_MSC_VER) || defined (__INTEL_COMPILER))
+ #define LOAD64(a) *(V64*)&(a)
+ #define CONST64(a) *(V64*)&(a)
+ #define STORE64(a, b) *(V64*)&(a) = b
+ #else
+ #define LOAD64(a) (V64)a
+ #define CONST64(a) (V64)a
+ #define STORE64(a, b) a = (UINT64)b
+ #endif
+ #define ROL64(a, o) _mm_or_si64(_mm_slli_si64(a, o), _mm_srli_si64(a, 64-(o)))
+ #define XOR64(a, b) _mm_xor_si64(a, b)
+ #define XOReq64(a, b) a = _mm_xor_si64(a, b)
+
+ #include "KeccakF-1600-simd64.macros"
+
+ #ifdef UseBebigokimisa
+ #error "UseBebigokimisa cannot be used in combination with UseMMX"
+ #endif
+#else
+ #if defined(_MSC_VER)
+ #define ROL64(a, offset) _rotl64(a, offset)
+ #elif defined(UseSHLD)
+ #define ROL64(x,N) ({ \
+ register UINT64 __out; \
+ register UINT64 __in = x; \
+ __asm__ ("shld %2,%0,%0" : "=r"(__out) : "0"(__in), "i"(N)); \
+ __out; \
+ })
+ #else
+ #define ROL64(a, offset) ((((UINT64)a) << offset) ^ (((UINT64)a) >> (64-offset)))
+ #endif
+
+ #include "KeccakF-1600-64.macros"
+#endif
+
+#include "KeccakF-1600-unrolling.macros"
+
+static void KeccakPermutationOnWords(UINT64 *state)
+{
+ declareABCDE
+#if (Unrolling != 24)
+ unsigned int i;
+#endif
+
+ copyFromState(A, state)
+ rounds
+#if defined(UseMMX)
+ _mm_empty();
+#endif
+}
+
+static void KeccakPermutationOnWordsAfterXoring(UINT64 *state, const UINT64 *input, unsigned int laneCount)
+{
+ declareABCDE
+#if (Unrolling != 24)
+ unsigned int i;
+#endif
+ unsigned int j;
+
+ for(j=0; j<laneCount; j++)
+ state[j] ^= input[j];
+ copyFromState(A, state)
+ rounds
+#if defined(UseMMX)
+ _mm_empty();
+#endif
+}
+
+#ifdef ProvideFast576
+static void KeccakPermutationOnWordsAfterXoring576bits(UINT64 *state, const UINT64 *input)
+{
+ declareABCDE
+#if (Unrolling != 24)
+ unsigned int i;
+#endif
+
+ copyFromStateAndXor576bits(A, state, input)
+ rounds
+#if defined(UseMMX)
+ _mm_empty();
+#endif
+}
+#endif
+
+#ifdef ProvideFast832
+static void KeccakPermutationOnWordsAfterXoring832bits(UINT64 *state, const UINT64 *input)
+{
+ declareABCDE
+#if (Unrolling != 24)
+ unsigned int i;
+#endif
+
+ copyFromStateAndXor832bits(A, state, input)
+ rounds
+#if defined(UseMMX)
+ _mm_empty();
+#endif
+}
+#endif
+
+#ifdef ProvideFast1024
+static void KeccakPermutationOnWordsAfterXoring1024bits(UINT64 *state, const UINT64 *input)
+{
+ declareABCDE
+#if (Unrolling != 24)
+ unsigned int i;
+#endif
+
+ copyFromStateAndXor1024bits(A, state, input)
+ rounds
+#if defined(UseMMX)
+ _mm_empty();
+#endif
+}
+#endif
+
+#ifdef ProvideFast1088
+static void KeccakPermutationOnWordsAfterXoring1088bits(UINT64 *state, const UINT64 *input)
+{
+ declareABCDE
+#if (Unrolling != 24)
+ unsigned int i;
+#endif
+
+ copyFromStateAndXor1088bits(A, state, input)
+ rounds
+#if defined(UseMMX)
+ _mm_empty();
+#endif
+}
+#endif
+
+#ifdef ProvideFast1152
+static void KeccakPermutationOnWordsAfterXoring1152bits(UINT64 *state, const UINT64 *input)
+{
+ declareABCDE
+#if (Unrolling != 24)
+ unsigned int i;
+#endif
+
+ copyFromStateAndXor1152bits(A, state, input)
+ rounds
+#if defined(UseMMX)
+ _mm_empty();
+#endif
+}
+#endif
+
+#ifdef ProvideFast1344
+static void KeccakPermutationOnWordsAfterXoring1344bits(UINT64 *state, const UINT64 *input)
+{
+ declareABCDE
+#if (Unrolling != 24)
+ unsigned int i;
+#endif
+
+ copyFromStateAndXor1344bits(A, state, input)
+ rounds
+#if defined(UseMMX)
+ _mm_empty();
+#endif
+}
+#endif
+
+static void KeccakInitialize()
+{
+}
+
+static void KeccakInitializeState(unsigned char *state)
+{
+ memset(state, 0, 200);
+#ifdef UseBebigokimisa
+ ((UINT64*)state)[ 1] = ~(UINT64)0;
+ ((UINT64*)state)[ 2] = ~(UINT64)0;
+ ((UINT64*)state)[ 8] = ~(UINT64)0;
+ ((UINT64*)state)[12] = ~(UINT64)0;
+ ((UINT64*)state)[17] = ~(UINT64)0;
+ ((UINT64*)state)[20] = ~(UINT64)0;
+#endif
+}
+
+static void KeccakPermutation(unsigned char *state)
+{
+ /* We assume the state is always stored as words */
+ KeccakPermutationOnWords((UINT64*)state);
+}
+
+#if (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN)
+static void fromBytesToWord(UINT64 *word, const UINT8 *bytes)
+{
+ unsigned int i;
+
+ *word = 0;
+ for(i=0; i<(64/8); i++)
+ *word |= (UINT64)(bytes[i]) << (8*i);
+}
+#endif
+
+
+#ifdef ProvideFast576
+static void KeccakAbsorb576bits(unsigned char *state, const unsigned char *data)
+{
+#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
+ KeccakPermutationOnWordsAfterXoring576bits((UINT64*)state, (const UINT64*)data);
+#else
+ UINT64 dataAsWords[9];
+ unsigned int i;
+
+ for(i=0; i<9; i++)
+ fromBytesToWord(dataAsWords+i, data+(i*8));
+ KeccakPermutationOnWordsAfterXoring576bits((UINT64*)state, dataAsWords);
+#endif
+}
+#endif
+
+#ifdef ProvideFast832
+static void KeccakAbsorb832bits(unsigned char *state, const unsigned char *data)
+{
+#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
+ KeccakPermutationOnWordsAfterXoring832bits((UINT64*)state, (const UINT64*)data);
+#else
+ UINT64 dataAsWords[13];
+ unsigned int i;
+
+ for(i=0; i<13; i++)
+ fromBytesToWord(dataAsWords+i, data+(i*8));
+ KeccakPermutationOnWordsAfterXoring832bits((UINT64*)state, dataAsWords);
+#endif
+}
+#endif
+
+#ifdef ProvideFast1024
+static void KeccakAbsorb1024bits(unsigned char *state, const unsigned char *data)
+{
+#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
+ KeccakPermutationOnWordsAfterXoring1024bits((UINT64*)state, (const UINT64*)data);
+#else
+ UINT64 dataAsWords[16];
+ unsigned int i;
+
+ for(i=0; i<16; i++)
+ fromBytesToWord(dataAsWords+i, data+(i*8));
+ KeccakPermutationOnWordsAfterXoring1024bits((UINT64*)state, dataAsWords);
+#endif
+}
+#endif
+
+#ifdef ProvideFast1088
+static void KeccakAbsorb1088bits(unsigned char *state, const unsigned char *data)
+{
+#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
+ KeccakPermutationOnWordsAfterXoring1088bits((UINT64*)state, (const UINT64*)data);
+#else
+ UINT64 dataAsWords[17];
+ unsigned int i;
+
+ for(i=0; i<17; i++)
+ fromBytesToWord(dataAsWords+i, data+(i*8));
+ KeccakPermutationOnWordsAfterXoring1088bits((UINT64*)state, dataAsWords);
+#endif
+}
+#endif
+
+#ifdef ProvideFast1152
+static void KeccakAbsorb1152bits(unsigned char *state, const unsigned char *data)
+{
+#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
+ KeccakPermutationOnWordsAfterXoring1152bits((UINT64*)state, (const UINT64*)data);
+#else
+ UINT64 dataAsWords[18];
+ unsigned int i;
+
+ for(i=0; i<18; i++)
+ fromBytesToWord(dataAsWords+i, data+(i*8));
+ KeccakPermutationOnWordsAfterXoring1152bits((UINT64*)state, dataAsWords);
+#endif
+}
+#endif
+
+#ifdef ProvideFast1344
+static void KeccakAbsorb1344bits(unsigned char *state, const unsigned char *data)
+{
+#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
+ KeccakPermutationOnWordsAfterXoring1344bits((UINT64*)state, (const UINT64*)data);
+#else
+ UINT64 dataAsWords[21];
+ unsigned int i;
+
+ for(i=0; i<21; i++)
+ fromBytesToWord(dataAsWords+i, data+(i*8));
+ KeccakPermutationOnWordsAfterXoring1344bits((UINT64*)state, dataAsWords);
+#endif
+}
+#endif
+
+static void KeccakAbsorb(unsigned char *state, const unsigned char *data, unsigned int laneCount)
+{
+#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
+ KeccakPermutationOnWordsAfterXoring((UINT64*)state, (const UINT64*)data, laneCount);
+#else
+ UINT64 dataAsWords[25];
+ unsigned int i;
+
+ for(i=0; i<laneCount; i++)
+ fromBytesToWord(dataAsWords+i, data+(i*8));
+ KeccakPermutationOnWordsAfterXoring((UINT64*)state, dataAsWords, laneCount);
+#endif
+}
+
+#if (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN)
+static void fromWordToBytes(UINT8 *bytes, const UINT64 word)
+{
+ unsigned int i;
+
+ for(i=0; i<(64/8); i++)
+ bytes[i] = (word >> (8*i)) & 0xFF;
+}
+#endif
+
+
+#ifdef ProvideFast1024
+static void KeccakExtract1024bits(const unsigned char *state, unsigned char *data)
+{
+#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
+ memcpy(data, state, 128);
+#else
+ unsigned int i;
+
+ for(i=0; i<16; i++)
+ fromWordToBytes(data+(i*8), ((const UINT64*)state)[i]);
+#endif
+#ifdef UseBebigokimisa
+ ((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1];
+ ((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2];
+ ((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8];
+ ((UINT64*)data)[12] = ~((UINT64*)data)[12];
+#endif
+}
+#endif
+
+static void KeccakExtract(const unsigned char *state, unsigned char *data, unsigned int laneCount)
+{
+#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
+ memcpy(data, state, laneCount*8);
+#else
+ unsigned int i;
+
+ for(i=0; i<laneCount; i++)
+ fromWordToBytes(data+(i*8), ((const UINT64*)state)[i]);
+#endif
+#ifdef UseBebigokimisa
+ if (laneCount > 1) {
+ ((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1];
+ if (laneCount > 2) {
+ ((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2];
+ if (laneCount > 8) {
+ ((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8];
+ if (laneCount > 12) {
+ ((UINT64*)data)[12] = ~((UINT64*)data)[12];
+ if (laneCount > 17) {
+ ((UINT64*)data)[17] = ~((UINT64*)data)[17];
+ if (laneCount > 20) {
+ ((UINT64*)data)[20] = ~((UINT64*)data)[20];
+ }
+ }
+ }
+ }
+ }
+ }
+#endif
+}
diff --git a/Modules/_sha3/keccak/KeccakF-1600-simd128.macros b/Modules/_sha3/keccak/KeccakF-1600-simd128.macros
new file mode 100644
index 0000000000..98e47f5a59
--- /dev/null
+++ b/Modules/_sha3/keccak/KeccakF-1600-simd128.macros
@@ -0,0 +1,651 @@
+/*
+The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
+Michaël Peeters and Gilles Van Assche. For more information, feedback or
+questions, please refer to our website: http://keccak.noekeon.org/
+
+Implementation by the designers,
+hereby denoted as "the implementer".
+
+To the extent possible under law, the implementer has waived all copyright
+and related or neighboring rights to the source code in this file.
+http://creativecommons.org/publicdomain/zero/1.0/
+*/
+
+#define declareABCDE \
+ V6464 Abage, Abegi, Abigo, Abogu, Abuga; \
+ V6464 Akame, Akemi, Akimo, Akomu, Akuma; \
+ V6464 Abae, Abio, Agae, Agio, Akae, Akio, Amae, Amio, Asae, Asio; \
+ V64 Aba, Abe, Abi, Abo, Abu; \
+ V64 Aga, Age, Agi, Ago, Agu; \
+ V64 Aka, Ake, Aki, Ako, Aku; \
+ V64 Ama, Ame, Ami, Amo, Amu; \
+ V64 Asa, Ase, Asi, Aso, Asu; \
+ V128 Bbage, Bbegi, Bbigo, Bbogu, Bbuga; \
+ V128 Bkame, Bkemi, Bkimo, Bkomu, Bkuma; \
+ V64 Bba, Bbe, Bbi, Bbo, Bbu; \
+ V64 Bga, Bge, Bgi, Bgo, Bgu; \
+ V64 Bka, Bke, Bki, Bko, Bku; \
+ V64 Bma, Bme, Bmi, Bmo, Bmu; \
+ V64 Bsa, Bse, Bsi, Bso, Bsu; \
+ V128 Cae, Cei, Cio, Cou, Cua, Dei, Dou; \
+ V64 Ca, Ce, Ci, Co, Cu; \
+ V64 Da, De, Di, Do, Du; \
+ V6464 Ebage, Ebegi, Ebigo, Ebogu, Ebuga; \
+ V6464 Ekame, Ekemi, Ekimo, Ekomu, Ekuma; \
+ V64 Eba, Ebe, Ebi, Ebo, Ebu; \
+ V64 Ega, Ege, Egi, Ego, Egu; \
+ V64 Eka, Eke, Eki, Eko, Eku; \
+ V64 Ema, Eme, Emi, Emo, Emu; \
+ V64 Esa, Ese, Esi, Eso, Esu; \
+ V128 Zero;
+
+#define prepareTheta
+
+#define computeD \
+ Cua = GET64LOLO(Cu, Cae); \
+ Dei = XOR128(Cae, ROL64in128(Cio, 1)); \
+ Dou = XOR128(Cio, ROL64in128(Cua, 1)); \
+ Da = XOR64(Cu, ROL64in128(COPY64HI2LO(Cae), 1)); \
+ De = Dei; \
+ Di = COPY64HI2LO(Dei); \
+ Do = Dou; \
+ Du = COPY64HI2LO(Dou);
+
+/* --- Theta Rho Pi Chi Iota Prepare-theta */
+/* --- 64-bit lanes mapped to 64-bit and 128-bit words */
+#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
+ computeD \
+ \
+ A##ba = LOAD64(A##bage.v64[0]); \
+ XOReq64(A##ba, Da); \
+ Bba = A##ba; \
+ XOReq64(A##gu, Du); \
+ Bge = ROL64(A##gu, 20); \
+ Bbage = GET64LOLO(Bba, Bge); \
+ A##ge = LOAD64(A##bage.v64[1]); \
+ XOReq64(A##ge, De); \
+ Bbe = ROL64(A##ge, 44); \
+ A##ka = LOAD64(A##kame.v64[0]); \
+ XOReq64(A##ka, Da); \
+ Bgi = ROL64(A##ka, 3); \
+ Bbegi = GET64LOLO(Bbe, Bgi); \
+ XOReq64(A##ki, Di); \
+ Bbi = ROL64(A##ki, 43); \
+ A##me = LOAD64(A##kame.v64[1]); \
+ XOReq64(A##me, De); \
+ Bgo = ROL64(A##me, 45); \
+ Bbigo = GET64LOLO(Bbi, Bgo); \
+ E##bage.v128 = XOR128(Bbage, ANDnu128(Bbegi, Bbigo)); \
+ XOReq128(E##bage.v128, CONST64(KeccakF1600RoundConstants[i])); \
+ Cae = E##bage.v128; \
+ XOReq64(A##mo, Do); \
+ Bbo = ROL64(A##mo, 21); \
+ XOReq64(A##si, Di); \
+ Bgu = ROL64(A##si, 61); \
+ Bbogu = GET64LOLO(Bbo, Bgu); \
+ E##begi.v128 = XOR128(Bbegi, ANDnu128(Bbigo, Bbogu)); \
+ Cei = E##begi.v128; \
+ XOReq64(A##su, Du); \
+ Bbu = ROL64(A##su, 14); \
+ XOReq64(A##bo, Do); \
+ Bga = ROL64(A##bo, 28); \
+ Bbuga = GET64LOLO(Bbu, Bga); \
+ E##bigo.v128 = XOR128(Bbigo, ANDnu128(Bbogu, Bbuga)); \
+ E##bi = E##bigo.v128; \
+ E##go = GET64HIHI(E##bigo.v128, E##bigo.v128); \
+ Cio = E##bigo.v128; \
+ E##bogu.v128 = XOR128(Bbogu, ANDnu128(Bbuga, Bbage)); \
+ E##bo = E##bogu.v128; \
+ E##gu = GET64HIHI(E##bogu.v128, E##bogu.v128); \
+ Cou = E##bogu.v128; \
+ E##buga.v128 = XOR128(Bbuga, ANDnu128(Bbage, Bbegi)); \
+ E##bu = E##buga.v128; \
+ E##ga = GET64HIHI(E##buga.v128, E##buga.v128); \
+ Cua = E##buga.v128; \
+\
+ A##be = LOAD64(A##begi.v64[0]); \
+ XOReq64(A##be, De); \
+ Bka = ROL64(A##be, 1); \
+ XOReq64(A##ga, Da); \
+ Bme = ROL64(A##ga, 36); \
+ Bkame = GET64LOLO(Bka, Bme); \
+ A##gi = LOAD64(A##begi.v64[1]); \
+ XOReq64(A##gi, Di); \
+ Bke = ROL64(A##gi, 6); \
+ A##ke = LOAD64(A##kemi.v64[0]); \
+ XOReq64(A##ke, De); \
+ Bmi = ROL64(A##ke, 10); \
+ Bkemi = GET64LOLO(Bke, Bmi); \
+ XOReq64(A##ko, Do); \
+ Bki = ROL64(A##ko, 25); \
+ A##mi = LOAD64(A##kemi.v64[1]); \
+ XOReq64(A##mi, Di); \
+ Bmo = ROL64(A##mi, 15); \
+ Bkimo = GET64LOLO(Bki, Bmo); \
+ E##kame.v128 = XOR128(Bkame, ANDnu128(Bkemi, Bkimo)); \
+ XOReq128(Cae, E##kame.v128); \
+ Bkomu = GET64LOLO(XOR64(A##mu, Du), XOR64(A##so, Do)); \
+ Bkomu = SHUFFLEBYTES128(Bkomu, CONST128(rho8_56)); \
+ E##kemi.v128 = XOR128(Bkemi, ANDnu128(Bkimo, Bkomu)); \
+ XOReq128(Cei, E##kemi.v128); \
+ XOReq64(A##sa, Da); \
+ Bku = ROL64(A##sa, 18); \
+ XOReq64(A##bu, Du); \
+ Bma = ROL64(A##bu, 27); \
+ Bkuma = GET64LOLO(Bku, Bma); \
+ E##kimo.v128 = XOR128(Bkimo, ANDnu128(Bkomu, Bkuma)); \
+ E##ki = E##kimo.v128; \
+ E##mo = GET64HIHI(E##kimo.v128, E##kimo.v128); \
+ XOReq128(Cio, E##kimo.v128); \
+ E##komu.v128 = XOR128(Bkomu, ANDnu128(Bkuma, Bkame)); \
+ E##ko = E##komu.v128; \
+ E##mu = GET64HIHI(E##komu.v128, E##komu.v128); \
+ XOReq128(Cou, E##komu.v128); \
+ E##kuma.v128 = XOR128(Bkuma, ANDnu128(Bkame, Bkemi)); \
+ E##ku = E##kuma.v128; \
+ E##ma = GET64HIHI(E##kuma.v128, E##kuma.v128); \
+ XOReq128(Cua, E##kuma.v128); \
+\
+ XOReq64(A##bi, Di); \
+ Bsa = ROL64(A##bi, 62); \
+ XOReq64(A##go, Do); \
+ Bse = ROL64(A##go, 55); \
+ XOReq64(A##ku, Du); \
+ Bsi = ROL64(A##ku, 39); \
+ E##sa = XOR64(Bsa, ANDnu64(Bse, Bsi)); \
+ Ca = E##sa; \
+ XOReq64(A##ma, Da); \
+ Bso = ROL64(A##ma, 41); \
+ E##se = XOR64(Bse, ANDnu64(Bsi, Bso)); \
+ Ce = E##se; \
+ XOReq128(Cae, GET64LOLO(Ca, Ce)); \
+ XOReq64(A##se, De); \
+ Bsu = ROL64(A##se, 2); \
+ E##si = XOR64(Bsi, ANDnu64(Bso, Bsu)); \
+ Ci = E##si; \
+ E##so = XOR64(Bso, ANDnu64(Bsu, Bsa)); \
+ Co = E##so; \
+ XOReq128(Cio, GET64LOLO(Ci, Co)); \
+ E##su = XOR64(Bsu, ANDnu64(Bsa, Bse)); \
+ Cu = E##su; \
+\
+ Zero = ZERO128(); \
+ XOReq128(Cae, GET64HIHI(Cua, Zero)); \
+ XOReq128(Cae, GET64LOLO(Zero, Cei)); \
+ XOReq128(Cio, GET64HIHI(Cei, Zero)); \
+ XOReq128(Cio, GET64LOLO(Zero, Cou)); \
+ XOReq128(Cua, GET64HIHI(Cou, Zero)); \
+ XOReq64(Cu, Cua); \
+
+/* --- Theta Rho Pi Chi Iota */
+/* --- 64-bit lanes mapped to 64-bit and 128-bit words */
+#define thetaRhoPiChiIota(i, A, E) thetaRhoPiChiIotaPrepareTheta(i, A, E)
+
+static const UINT64 KeccakF1600RoundConstants[24] = {
+ 0x0000000000000001ULL,
+ 0x0000000000008082ULL,
+ 0x800000000000808aULL,
+ 0x8000000080008000ULL,
+ 0x000000000000808bULL,
+ 0x0000000080000001ULL,
+ 0x8000000080008081ULL,
+ 0x8000000000008009ULL,
+ 0x000000000000008aULL,
+ 0x0000000000000088ULL,
+ 0x0000000080008009ULL,
+ 0x000000008000000aULL,
+ 0x000000008000808bULL,
+ 0x800000000000008bULL,
+ 0x8000000000008089ULL,
+ 0x8000000000008003ULL,
+ 0x8000000000008002ULL,
+ 0x8000000000000080ULL,
+ 0x000000000000800aULL,
+ 0x800000008000000aULL,
+ 0x8000000080008081ULL,
+ 0x8000000000008080ULL,
+ 0x0000000080000001ULL,
+ 0x8000000080008008ULL };
+
+#define copyFromStateAndXor576bits(X, state, input) \
+ X##bae.v128 = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
+ X##ba = X##bae.v128; \
+ X##be = GET64HIHI(X##bae.v128, X##bae.v128); \
+ Cae = X##bae.v128; \
+ X##bio.v128 = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
+ X##bi = X##bio.v128; \
+ X##bo = GET64HIHI(X##bio.v128, X##bio.v128); \
+ Cio = X##bio.v128; \
+ X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
+ Cu = X##bu; \
+ X##gae.v128 = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
+ X##ga = X##gae.v128; \
+ X##ge = GET64HIHI(X##gae.v128, X##gae.v128); \
+ X##bage.v128 = GET64LOLO(X##ba, X##ge); \
+ XOReq128(Cae, X##gae.v128); \
+ X##gio.v128 = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
+ X##gi = X##gio.v128; \
+ X##begi.v128 = GET64LOLO(X##be, X##gi); \
+ X##go = GET64HIHI(X##gio.v128, X##gio.v128); \
+ XOReq128(Cio, X##gio.v128); \
+ X##gu = LOAD64(state[ 9]); \
+ XOReq64(Cu, X##gu); \
+ X##kae.v128 = LOAD128(state[10]); \
+ X##ka = X##kae.v128; \
+ X##ke = GET64HIHI(X##kae.v128, X##kae.v128); \
+ XOReq128(Cae, X##kae.v128); \
+ X##kio.v128 = LOAD128(state[12]); \
+ X##ki = X##kio.v128; \
+ X##ko = GET64HIHI(X##kio.v128, X##kio.v128); \
+ XOReq128(Cio, X##kio.v128); \
+ X##ku = LOAD64(state[14]); \
+ XOReq64(Cu, X##ku); \
+ X##mae.v128 = LOAD128u(state[15]); \
+ X##ma = X##mae.v128; \
+ X##me = GET64HIHI(X##mae.v128, X##mae.v128); \
+ X##kame.v128 = GET64LOLO(X##ka, X##me); \
+ XOReq128(Cae, X##mae.v128); \
+ X##mio.v128 = LOAD128u(state[17]); \
+ X##mi = X##mio.v128; \
+ X##kemi.v128 = GET64LOLO(X##ke, X##mi); \
+ X##mo = GET64HIHI(X##mio.v128, X##mio.v128); \
+ XOReq128(Cio, X##mio.v128); \
+ X##mu = LOAD64(state[19]); \
+ XOReq64(Cu, X##mu); \
+ X##sae.v128 = LOAD128(state[20]); \
+ X##sa = X##sae.v128; \
+ X##se = GET64HIHI(X##sae.v128, X##sae.v128); \
+ XOReq128(Cae, X##sae.v128); \
+ X##sio.v128 = LOAD128(state[22]); \
+ X##si = X##sio.v128; \
+ X##so = GET64HIHI(X##sio.v128, X##sio.v128); \
+ XOReq128(Cio, X##sio.v128); \
+ X##su = LOAD64(state[24]); \
+ XOReq64(Cu, X##su); \
+
+#define copyFromStateAndXor832bits(X, state, input) \
+ X##bae.v128 = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
+ X##ba = X##bae.v128; \
+ X##be = GET64HIHI(X##bae.v128, X##bae.v128); \
+ Cae = X##bae.v128; \
+ X##bio.v128 = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
+ X##bi = X##bio.v128; \
+ X##bo = GET64HIHI(X##bio.v128, X##bio.v128); \
+ Cio = X##bio.v128; \
+ X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
+ Cu = X##bu; \
+ X##gae.v128 = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
+ X##ga = X##gae.v128; \
+ X##ge = GET64HIHI(X##gae.v128, X##gae.v128); \
+ X##bage.v128 = GET64LOLO(X##ba, X##ge); \
+ XOReq128(Cae, X##gae.v128); \
+ X##gio.v128 = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
+ X##gi = X##gio.v128; \
+ X##begi.v128 = GET64LOLO(X##be, X##gi); \
+ X##go = GET64HIHI(X##gio.v128, X##gio.v128); \
+ XOReq128(Cio, X##gio.v128); \
+ X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
+ XOReq64(Cu, X##gu); \
+ X##kae.v128 = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \
+ X##ka = X##kae.v128; \
+ X##ke = GET64HIHI(X##kae.v128, X##kae.v128); \
+ XOReq128(Cae, X##kae.v128); \
+ X##kio.v128 = XOR128(LOAD128(state[12]), LOAD64(input[12])); \
+ X##ki = X##kio.v128; \
+ X##ko = GET64HIHI(X##kio.v128, X##kio.v128); \
+ XOReq128(Cio, X##kio.v128); \
+ X##ku = LOAD64(state[14]); \
+ XOReq64(Cu, X##ku); \
+ X##mae.v128 = LOAD128u(state[15]); \
+ X##ma = X##mae.v128; \
+ X##me = GET64HIHI(X##mae.v128, X##mae.v128); \
+ X##kame.v128 = GET64LOLO(X##ka, X##me); \
+ XOReq128(Cae, X##mae.v128); \
+ X##mio.v128 = LOAD128u(state[17]); \
+ X##mi = X##mio.v128; \
+ X##kemi.v128 = GET64LOLO(X##ke, X##mi); \
+ X##mo = GET64HIHI(X##mio.v128, X##mio.v128); \
+ XOReq128(Cio, X##mio.v128); \
+ X##mu = LOAD64(state[19]); \
+ XOReq64(Cu, X##mu); \
+ X##sae.v128 = LOAD128(state[20]); \
+ X##sa = X##sae.v128; \
+ X##se = GET64HIHI(X##sae.v128, X##sae.v128); \
+ XOReq128(Cae, X##sae.v128); \
+ X##sio.v128 = LOAD128(state[22]); \
+ X##si = X##sio.v128; \
+ X##so = GET64HIHI(X##sio.v128, X##sio.v128); \
+ XOReq128(Cio, X##sio.v128); \
+ X##su = LOAD64(state[24]); \
+ XOReq64(Cu, X##su); \
+
+#define copyFromStateAndXor1024bits(X, state, input) \
+ X##bae.v128 = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
+ X##ba = X##bae.v128; \
+ X##be = GET64HIHI(X##bae.v128, X##bae.v128); \
+ Cae = X##bae.v128; \
+ X##bio.v128 = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
+ X##bi = X##bio.v128; \
+ X##bo = GET64HIHI(X##bio.v128, X##bio.v128); \
+ Cio = X##bio.v128; \
+ X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
+ Cu = X##bu; \
+ X##gae.v128 = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
+ X##ga = X##gae.v128; \
+ X##ge = GET64HIHI(X##gae.v128, X##gae.v128); \
+ X##bage.v128 = GET64LOLO(X##ba, X##ge); \
+ XOReq128(Cae, X##gae.v128); \
+ X##gio.v128 = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
+ X##gi = X##gio.v128; \
+ X##begi.v128 = GET64LOLO(X##be, X##gi); \
+ X##go = GET64HIHI(X##gio.v128, X##gio.v128); \
+ XOReq128(Cio, X##gio.v128); \
+ X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
+ XOReq64(Cu, X##gu); \
+ X##kae.v128 = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \
+ X##ka = X##kae.v128; \
+ X##ke = GET64HIHI(X##kae.v128, X##kae.v128); \
+ XOReq128(Cae, X##kae.v128); \
+ X##kio.v128 = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \
+ X##ki = X##kio.v128; \
+ X##ko = GET64HIHI(X##kio.v128, X##kio.v128); \
+ XOReq128(Cio, X##kio.v128); \
+ X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \
+ XOReq64(Cu, X##ku); \
+ X##mae.v128 = XOR128(LOAD128u(state[15]), LOAD64(input[15])); \
+ X##ma = X##mae.v128; \
+ X##me = GET64HIHI(X##mae.v128, X##mae.v128); \
+ X##kame.v128 = GET64LOLO(X##ka, X##me); \
+ XOReq128(Cae, X##mae.v128); \
+ X##mio.v128 = LOAD128u(state[17]); \
+ X##mi = X##mio.v128; \
+ X##kemi.v128 = GET64LOLO(X##ke, X##mi); \
+ X##mo = GET64HIHI(X##mio.v128, X##mio.v128); \
+ XOReq128(Cio, X##mio.v128); \
+ X##mu = LOAD64(state[19]); \
+ XOReq64(Cu, X##mu); \
+ X##sae.v128 = LOAD128(state[20]); \
+ X##sa = X##sae.v128; \
+ X##se = GET64HIHI(X##sae.v128, X##sae.v128); \
+ XOReq128(Cae, X##sae.v128); \
+ X##sio.v128 = LOAD128(state[22]); \
+ X##si = X##sio.v128; \
+ X##so = GET64HIHI(X##sio.v128, X##sio.v128); \
+ XOReq128(Cio, X##sio.v128); \
+ X##su = LOAD64(state[24]); \
+ XOReq64(Cu, X##su); \
+
+#define copyFromStateAndXor1088bits(X, state, input) \
+ X##bae.v128 = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
+ X##ba = X##bae.v128; \
+ X##be = GET64HIHI(X##bae.v128, X##bae.v128); \
+ Cae = X##bae.v128; \
+ X##bio.v128 = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
+ X##bi = X##bio.v128; \
+ X##bo = GET64HIHI(X##bio.v128, X##bio.v128); \
+ Cio = X##bio.v128; \
+ X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
+ Cu = X##bu; \
+ X##gae.v128 = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
+ X##ga = X##gae.v128; \
+ X##ge = GET64HIHI(X##gae.v128, X##gae.v128); \
+ X##bage.v128 = GET64LOLO(X##ba, X##ge); \
+ XOReq128(Cae, X##gae.v128); \
+ X##gio.v128 = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
+ X##gi = X##gio.v128; \
+ X##begi.v128 = GET64LOLO(X##be, X##gi); \
+ X##go = GET64HIHI(X##gio.v128, X##gio.v128); \
+ XOReq128(Cio, X##gio.v128); \
+ X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
+ XOReq64(Cu, X##gu); \
+ X##kae.v128 = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \
+ X##ka = X##kae.v128; \
+ X##ke = GET64HIHI(X##kae.v128, X##kae.v128); \
+ XOReq128(Cae, X##kae.v128); \
+ X##kio.v128 = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \
+ X##ki = X##kio.v128; \
+ X##ko = GET64HIHI(X##kio.v128, X##kio.v128); \
+ XOReq128(Cio, X##kio.v128); \
+ X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \
+ XOReq64(Cu, X##ku); \
+ X##mae.v128 = XOR128(LOAD128u(state[15]), LOAD128u(input[15])); \
+ X##ma = X##mae.v128; \
+ X##me = GET64HIHI(X##mae.v128, X##mae.v128); \
+ X##kame.v128 = GET64LOLO(X##ka, X##me); \
+ XOReq128(Cae, X##mae.v128); \
+ X##mio.v128 = LOAD128u(state[17]); \
+ X##mi = X##mio.v128; \
+ X##kemi.v128 = GET64LOLO(X##ke, X##mi); \
+ X##mo = GET64HIHI(X##mio.v128, X##mio.v128); \
+ XOReq128(Cio, X##mio.v128); \
+ X##mu = LOAD64(state[19]); \
+ XOReq64(Cu, X##mu); \
+ X##sae.v128 = LOAD128(state[20]); \
+ X##sa = X##sae.v128; \
+ X##se = GET64HIHI(X##sae.v128, X##sae.v128); \
+ XOReq128(Cae, X##sae.v128); \
+ X##sio.v128 = LOAD128(state[22]); \
+ X##si = X##sio.v128; \
+ X##so = GET64HIHI(X##sio.v128, X##sio.v128); \
+ XOReq128(Cio, X##sio.v128); \
+ X##su = LOAD64(state[24]); \
+ XOReq64(Cu, X##su); \
+
+#define copyFromStateAndXor1152bits(X, state, input) \
+ X##bae.v128 = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
+ X##ba = X##bae.v128; \
+ X##be = GET64HIHI(X##bae.v128, X##bae.v128); \
+ Cae = X##bae.v128; \
+ X##bio.v128 = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
+ X##bi = X##bio.v128; \
+ X##bo = GET64HIHI(X##bio.v128, X##bio.v128); \
+ Cio = X##bio.v128; \
+ X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
+ Cu = X##bu; \
+ X##gae.v128 = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
+ X##ga = X##gae.v128; \
+ X##ge = GET64HIHI(X##gae.v128, X##gae.v128); \
+ X##bage.v128 = GET64LOLO(X##ba, X##ge); \
+ XOReq128(Cae, X##gae.v128); \
+ X##gio.v128 = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
+ X##gi = X##gio.v128; \
+ X##begi.v128 = GET64LOLO(X##be, X##gi); \
+ X##go = GET64HIHI(X##gio.v128, X##gio.v128); \
+ XOReq128(Cio, X##gio.v128); \
+ X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
+ XOReq64(Cu, X##gu); \
+ X##kae.v128 = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \
+ X##ka = X##kae.v128; \
+ X##ke = GET64HIHI(X##kae.v128, X##kae.v128); \
+ XOReq128(Cae, X##kae.v128); \
+ X##kio.v128 = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \
+ X##ki = X##kio.v128; \
+ X##ko = GET64HIHI(X##kio.v128, X##kio.v128); \
+ XOReq128(Cio, X##kio.v128); \
+ X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \
+ XOReq64(Cu, X##ku); \
+ X##mae.v128 = XOR128(LOAD128u(state[15]), LOAD128u(input[15])); \
+ X##ma = X##mae.v128; \
+ X##me = GET64HIHI(X##mae.v128, X##mae.v128); \
+ X##kame.v128 = GET64LOLO(X##ka, X##me); \
+ XOReq128(Cae, X##mae.v128); \
+ X##mio.v128 = XOR128(LOAD128u(state[17]), LOAD64(input[17])); \
+ X##mi = X##mio.v128; \
+ X##kemi.v128 = GET64LOLO(X##ke, X##mi); \
+ X##mo = GET64HIHI(X##mio.v128, X##mio.v128); \
+ XOReq128(Cio, X##mio.v128); \
+ X##mu = LOAD64(state[19]); \
+ XOReq64(Cu, X##mu); \
+ X##sae.v128 = LOAD128(state[20]); \
+ X##sa = X##sae.v128; \
+ X##se = GET64HIHI(X##sae.v128, X##sae.v128); \
+ XOReq128(Cae, X##sae.v128); \
+ X##sio.v128 = LOAD128(state[22]); \
+ X##si = X##sio.v128; \
+ X##so = GET64HIHI(X##sio.v128, X##sio.v128); \
+ XOReq128(Cio, X##sio.v128); \
+ X##su = LOAD64(state[24]); \
+ XOReq64(Cu, X##su); \
+
+#define copyFromStateAndXor1344bits(X, state, input) \
+ X##bae.v128 = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
+ X##ba = X##bae.v128; \
+ X##be = GET64HIHI(X##bae.v128, X##bae.v128); \
+ Cae = X##bae.v128; \
+ X##bio.v128 = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
+ X##bi = X##bio.v128; \
+ X##bo = GET64HIHI(X##bio.v128, X##bio.v128); \
+ Cio = X##bio.v128; \
+ X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
+ Cu = X##bu; \
+ X##gae.v128 = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
+ X##ga = X##gae.v128; \
+ X##ge = GET64HIHI(X##gae.v128, X##gae.v128); \
+ X##bage.v128 = GET64LOLO(X##ba, X##ge); \
+ XOReq128(Cae, X##gae.v128); \
+ X##gio.v128 = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
+ X##gi = X##gio.v128; \
+ X##begi.v128 = GET64LOLO(X##be, X##gi); \
+ X##go = GET64HIHI(X##gio.v128, X##gio.v128); \
+ XOReq128(Cio, X##gio.v128); \
+ X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
+ XOReq64(Cu, X##gu); \
+ X##kae.v128 = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \
+ X##ka = X##kae.v128; \
+ X##ke = GET64HIHI(X##kae.v128, X##kae.v128); \
+ XOReq128(Cae, X##kae.v128); \
+ X##kio.v128 = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \
+ X##ki = X##kio.v128; \
+ X##ko = GET64HIHI(X##kio.v128, X##kio.v128); \
+ XOReq128(Cio, X##kio.v128); \
+ X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \
+ XOReq64(Cu, X##ku); \
+ X##mae.v128 = XOR128(LOAD128u(state[15]), LOAD128u(input[15])); \
+ X##ma = X##mae.v128; \
+ X##me = GET64HIHI(X##mae.v128, X##mae.v128); \
+ X##kame.v128 = GET64LOLO(X##ka, X##me); \
+ XOReq128(Cae, X##mae.v128); \
+ X##mio.v128 = XOR128(LOAD128u(state[17]), LOAD128u(input[17])); \
+ X##mi = X##mio.v128; \
+ X##kemi.v128 = GET64LOLO(X##ke, X##mi); \
+ X##mo = GET64HIHI(X##mio.v128, X##mio.v128); \
+ XOReq128(Cio, X##mio.v128); \
+ X##mu = XOR64(LOAD64(state[19]), LOAD64(input[19])); \
+ XOReq64(Cu, X##mu); \
+ X##sae.v128 = XOR128(LOAD128(state[20]), LOAD64(input[20])); \
+ X##sa = X##sae.v128; \
+ X##se = GET64HIHI(X##sae.v128, X##sae.v128); \
+ XOReq128(Cae, X##sae.v128); \
+ X##sio.v128 = LOAD128(state[22]); \
+ X##si = X##sio.v128; \
+ X##so = GET64HIHI(X##sio.v128, X##sio.v128); \
+ XOReq128(Cio, X##sio.v128); \
+ X##su = LOAD64(state[24]); \
+ XOReq64(Cu, X##su); \
+
+#define copyFromState(X, state) \
+ X##bae.v128 = LOAD128(state[ 0]); \
+ X##ba = X##bae.v128; \
+ X##be = GET64HIHI(X##bae.v128, X##bae.v128); \
+ Cae = X##bae.v128; \
+ X##bio.v128 = LOAD128(state[ 2]); \
+ X##bi = X##bio.v128; \
+ X##bo = GET64HIHI(X##bio.v128, X##bio.v128); \
+ Cio = X##bio.v128; \
+ X##bu = LOAD64(state[ 4]); \
+ Cu = X##bu; \
+ X##gae.v128 = LOAD128u(state[ 5]); \
+ X##ga = X##gae.v128; \
+ X##ge = GET64HIHI(X##gae.v128, X##gae.v128); \
+ X##bage.v128 = GET64LOLO(X##ba, X##ge); \
+ XOReq128(Cae, X##gae.v128); \
+ X##gio.v128 = LOAD128u(state[ 7]); \
+ X##gi = X##gio.v128; \
+ X##begi.v128 = GET64LOLO(X##be, X##gi); \
+ X##go = GET64HIHI(X##gio.v128, X##gio.v128); \
+ XOReq128(Cio, X##gio.v128); \
+ X##gu = LOAD64(state[ 9]); \
+ XOReq64(Cu, X##gu); \
+ X##kae.v128 = LOAD128(state[10]); \
+ X##ka = X##kae.v128; \
+ X##ke = GET64HIHI(X##kae.v128, X##kae.v128); \
+ XOReq128(Cae, X##kae.v128); \
+ X##kio.v128 = LOAD128(state[12]); \
+ X##ki = X##kio.v128; \
+ X##ko = GET64HIHI(X##kio.v128, X##kio.v128); \
+ XOReq128(Cio, X##kio.v128); \
+ X##ku = LOAD64(state[14]); \
+ XOReq64(Cu, X##ku); \
+ X##mae.v128 = LOAD128u(state[15]); \
+ X##ma = X##mae.v128; \
+ X##me = GET64HIHI(X##mae.v128, X##mae.v128); \
+ X##kame.v128 = GET64LOLO(X##ka, X##me); \
+ XOReq128(Cae, X##mae.v128); \
+ X##mio.v128 = LOAD128u(state[17]); \
+ X##mi = X##mio.v128; \
+ X##kemi.v128 = GET64LOLO(X##ke, X##mi); \
+ X##mo = GET64HIHI(X##mio.v128, X##mio.v128); \
+ XOReq128(Cio, X##mio.v128); \
+ X##mu = LOAD64(state[19]); \
+ XOReq64(Cu, X##mu); \
+ X##sae.v128 = LOAD128(state[20]); \
+ X##sa = X##sae.v128; \
+ X##se = GET64HIHI(X##sae.v128, X##sae.v128); \
+ XOReq128(Cae, X##sae.v128); \
+ X##sio.v128 = LOAD128(state[22]); \
+ X##si = X##sio.v128; \
+ X##so = GET64HIHI(X##sio.v128, X##sio.v128); \
+ XOReq128(Cio, X##sio.v128); \
+ X##su = LOAD64(state[24]); \
+ XOReq64(Cu, X##su); \
+
+#define copyToState(state, X) \
+ state[ 0] = A##bage.v64[0]; \
+ state[ 1] = A##begi.v64[0]; \
+ STORE64(state[ 2], X##bi); \
+ STORE64(state[ 3], X##bo); \
+ STORE64(state[ 4], X##bu); \
+ STORE64(state[ 5], X##ga); \
+ state[ 6] = A##bage.v64[1]; \
+ state[ 7] = A##begi.v64[1]; \
+ STORE64(state[ 8], X##go); \
+ STORE64(state[ 9], X##gu); \
+ state[10] = X##kame.v64[0]; \
+ state[11] = X##kemi.v64[0]; \
+ STORE64(state[12], X##ki); \
+ STORE64(state[13], X##ko); \
+ STORE64(state[14], X##ku); \
+ STORE64(state[15], X##ma); \
+ state[16] = X##kame.v64[1]; \
+ state[17] = X##kemi.v64[1]; \
+ STORE64(state[18], X##mo); \
+ STORE64(state[19], X##mu); \
+ STORE64(state[20], X##sa); \
+ STORE64(state[21], X##se); \
+ STORE64(state[22], X##si); \
+ STORE64(state[23], X##so); \
+ STORE64(state[24], X##su); \
+
+#define copyStateVariables(X, Y) \
+ X##bage = Y##bage; \
+ X##begi = Y##begi; \
+ X##bi = Y##bi; \
+ X##bo = Y##bo; \
+ X##bu = Y##bu; \
+ X##ga = Y##ga; \
+ X##go = Y##go; \
+ X##gu = Y##gu; \
+ X##kame = Y##kame; \
+ X##kemi = Y##kemi; \
+ X##ki = Y##ki; \
+ X##ko = Y##ko; \
+ X##ku = Y##ku; \
+ X##ma = Y##ma; \
+ X##mo = Y##mo; \
+ X##mu = Y##mu; \
+ X##sa = Y##sa; \
+ X##se = Y##se; \
+ X##si = Y##si; \
+ X##so = Y##so; \
+ X##su = Y##su; \
+
diff --git a/Modules/_sha3/keccak/KeccakF-1600-simd64.macros b/Modules/_sha3/keccak/KeccakF-1600-simd64.macros
new file mode 100644
index 0000000000..06a30e2ae0
--- /dev/null
+++ b/Modules/_sha3/keccak/KeccakF-1600-simd64.macros
@@ -0,0 +1,517 @@
+/*
+Code automatically generated by KeccakTools!
+
+The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
+Michaël Peeters and Gilles Van Assche. For more information, feedback or
+questions, please refer to our website: http://keccak.noekeon.org/
+
+Implementation by the designers,
+hereby denoted as "the implementer".
+
+To the extent possible under law, the implementer has waived all copyright
+and related or neighboring rights to the source code in this file.
+http://creativecommons.org/publicdomain/zero/1.0/
+*/
+
+#define declareABCDE \
+ V64 Aba, Abe, Abi, Abo, Abu; \
+ V64 Aga, Age, Agi, Ago, Agu; \
+ V64 Aka, Ake, Aki, Ako, Aku; \
+ V64 Ama, Ame, Ami, Amo, Amu; \
+ V64 Asa, Ase, Asi, Aso, Asu; \
+ V64 Bba, Bbe, Bbi, Bbo, Bbu; \
+ V64 Bga, Bge, Bgi, Bgo, Bgu; \
+ V64 Bka, Bke, Bki, Bko, Bku; \
+ V64 Bma, Bme, Bmi, Bmo, Bmu; \
+ V64 Bsa, Bse, Bsi, Bso, Bsu; \
+ V64 Ca, Ce, Ci, Co, Cu; \
+ V64 Da, De, Di, Do, Du; \
+ V64 Eba, Ebe, Ebi, Ebo, Ebu; \
+ V64 Ega, Ege, Egi, Ego, Egu; \
+ V64 Eka, Eke, Eki, Eko, Eku; \
+ V64 Ema, Eme, Emi, Emo, Emu; \
+ V64 Esa, Ese, Esi, Eso, Esu; \
+
+#define prepareTheta \
+ Ca = XOR64(Aba, XOR64(Aga, XOR64(Aka, XOR64(Ama, Asa)))); \
+ Ce = XOR64(Abe, XOR64(Age, XOR64(Ake, XOR64(Ame, Ase)))); \
+ Ci = XOR64(Abi, XOR64(Agi, XOR64(Aki, XOR64(Ami, Asi)))); \
+ Co = XOR64(Abo, XOR64(Ago, XOR64(Ako, XOR64(Amo, Aso)))); \
+ Cu = XOR64(Abu, XOR64(Agu, XOR64(Aku, XOR64(Amu, Asu)))); \
+
+/* --- Code for round, with prepare-theta */
+/* --- 64-bit lanes mapped to 64-bit words */
+#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
+ Da = XOR64(Cu, ROL64(Ce, 1)); \
+ De = XOR64(Ca, ROL64(Ci, 1)); \
+ Di = XOR64(Ce, ROL64(Co, 1)); \
+ Do = XOR64(Ci, ROL64(Cu, 1)); \
+ Du = XOR64(Co, ROL64(Ca, 1)); \
+\
+ XOReq64(A##ba, Da); \
+ Bba = A##ba; \
+ XOReq64(A##ge, De); \
+ Bbe = ROL64(A##ge, 44); \
+ XOReq64(A##ki, Di); \
+ Bbi = ROL64(A##ki, 43); \
+ E##ba = XOR64(Bba, ANDnu64(Bbe, Bbi)); \
+ XOReq64(E##ba, CONST64(KeccakF1600RoundConstants[i])); \
+ Ca = E##ba; \
+ XOReq64(A##mo, Do); \
+ Bbo = ROL64(A##mo, 21); \
+ E##be = XOR64(Bbe, ANDnu64(Bbi, Bbo)); \
+ Ce = E##be; \
+ XOReq64(A##su, Du); \
+ Bbu = ROL64(A##su, 14); \
+ E##bi = XOR64(Bbi, ANDnu64(Bbo, Bbu)); \
+ Ci = E##bi; \
+ E##bo = XOR64(Bbo, ANDnu64(Bbu, Bba)); \
+ Co = E##bo; \
+ E##bu = XOR64(Bbu, ANDnu64(Bba, Bbe)); \
+ Cu = E##bu; \
+\
+ XOReq64(A##bo, Do); \
+ Bga = ROL64(A##bo, 28); \
+ XOReq64(A##gu, Du); \
+ Bge = ROL64(A##gu, 20); \
+ XOReq64(A##ka, Da); \
+ Bgi = ROL64(A##ka, 3); \
+ E##ga = XOR64(Bga, ANDnu64(Bge, Bgi)); \
+ XOReq64(Ca, E##ga); \
+ XOReq64(A##me, De); \
+ Bgo = ROL64(A##me, 45); \
+ E##ge = XOR64(Bge, ANDnu64(Bgi, Bgo)); \
+ XOReq64(Ce, E##ge); \
+ XOReq64(A##si, Di); \
+ Bgu = ROL64(A##si, 61); \
+ E##gi = XOR64(Bgi, ANDnu64(Bgo, Bgu)); \
+ XOReq64(Ci, E##gi); \
+ E##go = XOR64(Bgo, ANDnu64(Bgu, Bga)); \
+ XOReq64(Co, E##go); \
+ E##gu = XOR64(Bgu, ANDnu64(Bga, Bge)); \
+ XOReq64(Cu, E##gu); \
+\
+ XOReq64(A##be, De); \
+ Bka = ROL64(A##be, 1); \
+ XOReq64(A##gi, Di); \
+ Bke = ROL64(A##gi, 6); \
+ XOReq64(A##ko, Do); \
+ Bki = ROL64(A##ko, 25); \
+ E##ka = XOR64(Bka, ANDnu64(Bke, Bki)); \
+ XOReq64(Ca, E##ka); \
+ XOReq64(A##mu, Du); \
+ Bko = ROL64(A##mu, 8); \
+ E##ke = XOR64(Bke, ANDnu64(Bki, Bko)); \
+ XOReq64(Ce, E##ke); \
+ XOReq64(A##sa, Da); \
+ Bku = ROL64(A##sa, 18); \
+ E##ki = XOR64(Bki, ANDnu64(Bko, Bku)); \
+ XOReq64(Ci, E##ki); \
+ E##ko = XOR64(Bko, ANDnu64(Bku, Bka)); \
+ XOReq64(Co, E##ko); \
+ E##ku = XOR64(Bku, ANDnu64(Bka, Bke)); \
+ XOReq64(Cu, E##ku); \
+\
+ XOReq64(A##bu, Du); \
+ Bma = ROL64(A##bu, 27); \
+ XOReq64(A##ga, Da); \
+ Bme = ROL64(A##ga, 36); \
+ XOReq64(A##ke, De); \
+ Bmi = ROL64(A##ke, 10); \
+ E##ma = XOR64(Bma, ANDnu64(Bme, Bmi)); \
+ XOReq64(Ca, E##ma); \
+ XOReq64(A##mi, Di); \
+ Bmo = ROL64(A##mi, 15); \
+ E##me = XOR64(Bme, ANDnu64(Bmi, Bmo)); \
+ XOReq64(Ce, E##me); \
+ XOReq64(A##so, Do); \
+ Bmu = ROL64(A##so, 56); \
+ E##mi = XOR64(Bmi, ANDnu64(Bmo, Bmu)); \
+ XOReq64(Ci, E##mi); \
+ E##mo = XOR64(Bmo, ANDnu64(Bmu, Bma)); \
+ XOReq64(Co, E##mo); \
+ E##mu = XOR64(Bmu, ANDnu64(Bma, Bme)); \
+ XOReq64(Cu, E##mu); \
+\
+ XOReq64(A##bi, Di); \
+ Bsa = ROL64(A##bi, 62); \
+ XOReq64(A##go, Do); \
+ Bse = ROL64(A##go, 55); \
+ XOReq64(A##ku, Du); \
+ Bsi = ROL64(A##ku, 39); \
+ E##sa = XOR64(Bsa, ANDnu64(Bse, Bsi)); \
+ XOReq64(Ca, E##sa); \
+ XOReq64(A##ma, Da); \
+ Bso = ROL64(A##ma, 41); \
+ E##se = XOR64(Bse, ANDnu64(Bsi, Bso)); \
+ XOReq64(Ce, E##se); \
+ XOReq64(A##se, De); \
+ Bsu = ROL64(A##se, 2); \
+ E##si = XOR64(Bsi, ANDnu64(Bso, Bsu)); \
+ XOReq64(Ci, E##si); \
+ E##so = XOR64(Bso, ANDnu64(Bsu, Bsa)); \
+ XOReq64(Co, E##so); \
+ E##su = XOR64(Bsu, ANDnu64(Bsa, Bse)); \
+ XOReq64(Cu, E##su); \
+\
+
+/* --- Code for round */
+/* --- 64-bit lanes mapped to 64-bit words */
+#define thetaRhoPiChiIota(i, A, E) \
+ Da = XOR64(Cu, ROL64(Ce, 1)); \
+ De = XOR64(Ca, ROL64(Ci, 1)); \
+ Di = XOR64(Ce, ROL64(Co, 1)); \
+ Do = XOR64(Ci, ROL64(Cu, 1)); \
+ Du = XOR64(Co, ROL64(Ca, 1)); \
+\
+ XOReq64(A##ba, Da); \
+ Bba = A##ba; \
+ XOReq64(A##ge, De); \
+ Bbe = ROL64(A##ge, 44); \
+ XOReq64(A##ki, Di); \
+ Bbi = ROL64(A##ki, 43); \
+ E##ba = XOR64(Bba, ANDnu64(Bbe, Bbi)); \
+ XOReq64(E##ba, CONST64(KeccakF1600RoundConstants[i])); \
+ XOReq64(A##mo, Do); \
+ Bbo = ROL64(A##mo, 21); \
+ E##be = XOR64(Bbe, ANDnu64(Bbi, Bbo)); \
+ XOReq64(A##su, Du); \
+ Bbu = ROL64(A##su, 14); \
+ E##bi = XOR64(Bbi, ANDnu64(Bbo, Bbu)); \
+ E##bo = XOR64(Bbo, ANDnu64(Bbu, Bba)); \
+ E##bu = XOR64(Bbu, ANDnu64(Bba, Bbe)); \
+\
+ XOReq64(A##bo, Do); \
+ Bga = ROL64(A##bo, 28); \
+ XOReq64(A##gu, Du); \
+ Bge = ROL64(A##gu, 20); \
+ XOReq64(A##ka, Da); \
+ Bgi = ROL64(A##ka, 3); \
+ E##ga = XOR64(Bga, ANDnu64(Bge, Bgi)); \
+ XOReq64(A##me, De); \
+ Bgo = ROL64(A##me, 45); \
+ E##ge = XOR64(Bge, ANDnu64(Bgi, Bgo)); \
+ XOReq64(A##si, Di); \
+ Bgu = ROL64(A##si, 61); \
+ E##gi = XOR64(Bgi, ANDnu64(Bgo, Bgu)); \
+ E##go = XOR64(Bgo, ANDnu64(Bgu, Bga)); \
+ E##gu = XOR64(Bgu, ANDnu64(Bga, Bge)); \
+\
+ XOReq64(A##be, De); \
+ Bka = ROL64(A##be, 1); \
+ XOReq64(A##gi, Di); \
+ Bke = ROL64(A##gi, 6); \
+ XOReq64(A##ko, Do); \
+ Bki = ROL64(A##ko, 25); \
+ E##ka = XOR64(Bka, ANDnu64(Bke, Bki)); \
+ XOReq64(A##mu, Du); \
+ Bko = ROL64(A##mu, 8); \
+ E##ke = XOR64(Bke, ANDnu64(Bki, Bko)); \
+ XOReq64(A##sa, Da); \
+ Bku = ROL64(A##sa, 18); \
+ E##ki = XOR64(Bki, ANDnu64(Bko, Bku)); \
+ E##ko = XOR64(Bko, ANDnu64(Bku, Bka)); \
+ E##ku = XOR64(Bku, ANDnu64(Bka, Bke)); \
+\
+ XOReq64(A##bu, Du); \
+ Bma = ROL64(A##bu, 27); \
+ XOReq64(A##ga, Da); \
+ Bme = ROL64(A##ga, 36); \
+ XOReq64(A##ke, De); \
+ Bmi = ROL64(A##ke, 10); \
+ E##ma = XOR64(Bma, ANDnu64(Bme, Bmi)); \
+ XOReq64(A##mi, Di); \
+ Bmo = ROL64(A##mi, 15); \
+ E##me = XOR64(Bme, ANDnu64(Bmi, Bmo)); \
+ XOReq64(A##so, Do); \
+ Bmu = ROL64(A##so, 56); \
+ E##mi = XOR64(Bmi, ANDnu64(Bmo, Bmu)); \
+ E##mo = XOR64(Bmo, ANDnu64(Bmu, Bma)); \
+ E##mu = XOR64(Bmu, ANDnu64(Bma, Bme)); \
+\
+ XOReq64(A##bi, Di); \
+ Bsa = ROL64(A##bi, 62); \
+ XOReq64(A##go, Do); \
+ Bse = ROL64(A##go, 55); \
+ XOReq64(A##ku, Du); \
+ Bsi = ROL64(A##ku, 39); \
+ E##sa = XOR64(Bsa, ANDnu64(Bse, Bsi)); \
+ XOReq64(A##ma, Da); \
+ Bso = ROL64(A##ma, 41); \
+ E##se = XOR64(Bse, ANDnu64(Bsi, Bso)); \
+ XOReq64(A##se, De); \
+ Bsu = ROL64(A##se, 2); \
+ E##si = XOR64(Bsi, ANDnu64(Bso, Bsu)); \
+ E##so = XOR64(Bso, ANDnu64(Bsu, Bsa)); \
+ E##su = XOR64(Bsu, ANDnu64(Bsa, Bse)); \
+\
+
+static const UINT64 KeccakF1600RoundConstants[24] = {
+ 0x0000000000000001ULL,
+ 0x0000000000008082ULL,
+ 0x800000000000808aULL,
+ 0x8000000080008000ULL,
+ 0x000000000000808bULL,
+ 0x0000000080000001ULL,
+ 0x8000000080008081ULL,
+ 0x8000000000008009ULL,
+ 0x000000000000008aULL,
+ 0x0000000000000088ULL,
+ 0x0000000080008009ULL,
+ 0x000000008000000aULL,
+ 0x000000008000808bULL,
+ 0x800000000000008bULL,
+ 0x8000000000008089ULL,
+ 0x8000000000008003ULL,
+ 0x8000000000008002ULL,
+ 0x8000000000000080ULL,
+ 0x000000000000800aULL,
+ 0x800000008000000aULL,
+ 0x8000000080008081ULL,
+ 0x8000000000008080ULL,
+ 0x0000000080000001ULL,
+ 0x8000000080008008ULL };
+
+#define copyFromStateAndXor576bits(X, state, input) \
+ X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \
+ X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \
+ X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \
+ X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \
+ X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
+ X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \
+ X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \
+ X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \
+ X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \
+ X##gu = LOAD64(state[ 9]); \
+ X##ka = LOAD64(state[10]); \
+ X##ke = LOAD64(state[11]); \
+ X##ki = LOAD64(state[12]); \
+ X##ko = LOAD64(state[13]); \
+ X##ku = LOAD64(state[14]); \
+ X##ma = LOAD64(state[15]); \
+ X##me = LOAD64(state[16]); \
+ X##mi = LOAD64(state[17]); \
+ X##mo = LOAD64(state[18]); \
+ X##mu = LOAD64(state[19]); \
+ X##sa = LOAD64(state[20]); \
+ X##se = LOAD64(state[21]); \
+ X##si = LOAD64(state[22]); \
+ X##so = LOAD64(state[23]); \
+ X##su = LOAD64(state[24]); \
+
+#define copyFromStateAndXor832bits(X, state, input) \
+ X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \
+ X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \
+ X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \
+ X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \
+ X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
+ X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \
+ X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \
+ X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \
+ X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \
+ X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
+ X##ka = XOR64(LOAD64(state[10]), LOAD64(input[10])); \
+ X##ke = XOR64(LOAD64(state[11]), LOAD64(input[11])); \
+ X##ki = XOR64(LOAD64(state[12]), LOAD64(input[12])); \
+ X##ko = LOAD64(state[13]); \
+ X##ku = LOAD64(state[14]); \
+ X##ma = LOAD64(state[15]); \
+ X##me = LOAD64(state[16]); \
+ X##mi = LOAD64(state[17]); \
+ X##mo = LOAD64(state[18]); \
+ X##mu = LOAD64(state[19]); \
+ X##sa = LOAD64(state[20]); \
+ X##se = LOAD64(state[21]); \
+ X##si = LOAD64(state[22]); \
+ X##so = LOAD64(state[23]); \
+ X##su = LOAD64(state[24]); \
+
+#define copyFromStateAndXor1024bits(X, state, input) \
+ X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \
+ X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \
+ X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \
+ X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \
+ X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
+ X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \
+ X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \
+ X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \
+ X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \
+ X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
+ X##ka = XOR64(LOAD64(state[10]), LOAD64(input[10])); \
+ X##ke = XOR64(LOAD64(state[11]), LOAD64(input[11])); \
+ X##ki = XOR64(LOAD64(state[12]), LOAD64(input[12])); \
+ X##ko = XOR64(LOAD64(state[13]), LOAD64(input[13])); \
+ X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \
+ X##ma = XOR64(LOAD64(state[15]), LOAD64(input[15])); \
+ X##me = LOAD64(state[16]); \
+ X##mi = LOAD64(state[17]); \
+ X##mo = LOAD64(state[18]); \
+ X##mu = LOAD64(state[19]); \
+ X##sa = LOAD64(state[20]); \
+ X##se = LOAD64(state[21]); \
+ X##si = LOAD64(state[22]); \
+ X##so = LOAD64(state[23]); \
+ X##su = LOAD64(state[24]); \
+
+#define copyFromStateAndXor1088bits(X, state, input) \
+ X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \
+ X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \
+ X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \
+ X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \
+ X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
+ X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \
+ X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \
+ X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \
+ X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \
+ X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
+ X##ka = XOR64(LOAD64(state[10]), LOAD64(input[10])); \
+ X##ke = XOR64(LOAD64(state[11]), LOAD64(input[11])); \
+ X##ki = XOR64(LOAD64(state[12]), LOAD64(input[12])); \
+ X##ko = XOR64(LOAD64(state[13]), LOAD64(input[13])); \
+ X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \
+ X##ma = XOR64(LOAD64(state[15]), LOAD64(input[15])); \
+ X##me = XOR64(LOAD64(state[16]), LOAD64(input[16])); \
+ X##mi = LOAD64(state[17]); \
+ X##mo = LOAD64(state[18]); \
+ X##mu = LOAD64(state[19]); \
+ X##sa = LOAD64(state[20]); \
+ X##se = LOAD64(state[21]); \
+ X##si = LOAD64(state[22]); \
+ X##so = LOAD64(state[23]); \
+ X##su = LOAD64(state[24]); \
+
+#define copyFromStateAndXor1152bits(X, state, input) \
+ X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \
+ X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \
+ X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \
+ X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \
+ X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
+ X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \
+ X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \
+ X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \
+ X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \
+ X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
+ X##ka = XOR64(LOAD64(state[10]), LOAD64(input[10])); \
+ X##ke = XOR64(LOAD64(state[11]), LOAD64(input[11])); \
+ X##ki = XOR64(LOAD64(state[12]), LOAD64(input[12])); \
+ X##ko = XOR64(LOAD64(state[13]), LOAD64(input[13])); \
+ X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \
+ X##ma = XOR64(LOAD64(state[15]), LOAD64(input[15])); \
+ X##me = XOR64(LOAD64(state[16]), LOAD64(input[16])); \
+ X##mi = XOR64(LOAD64(state[17]), LOAD64(input[17])); \
+ X##mo = LOAD64(state[18]); \
+ X##mu = LOAD64(state[19]); \
+ X##sa = LOAD64(state[20]); \
+ X##se = LOAD64(state[21]); \
+ X##si = LOAD64(state[22]); \
+ X##so = LOAD64(state[23]); \
+ X##su = LOAD64(state[24]); \
+
+#define copyFromStateAndXor1344bits(X, state, input) \
+ X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \
+ X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \
+ X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \
+ X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \
+ X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
+ X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \
+ X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \
+ X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \
+ X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \
+ X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
+ X##ka = XOR64(LOAD64(state[10]), LOAD64(input[10])); \
+ X##ke = XOR64(LOAD64(state[11]), LOAD64(input[11])); \
+ X##ki = XOR64(LOAD64(state[12]), LOAD64(input[12])); \
+ X##ko = XOR64(LOAD64(state[13]), LOAD64(input[13])); \
+ X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \
+ X##ma = XOR64(LOAD64(state[15]), LOAD64(input[15])); \
+ X##me = XOR64(LOAD64(state[16]), LOAD64(input[16])); \
+ X##mi = XOR64(LOAD64(state[17]), LOAD64(input[17])); \
+ X##mo = XOR64(LOAD64(state[18]), LOAD64(input[18])); \
+ X##mu = XOR64(LOAD64(state[19]), LOAD64(input[19])); \
+ X##sa = XOR64(LOAD64(state[20]), LOAD64(input[20])); \
+ X##se = LOAD64(state[21]); \
+ X##si = LOAD64(state[22]); \
+ X##so = LOAD64(state[23]); \
+ X##su = LOAD64(state[24]); \
+
+#define copyFromState(X, state) \
+ X##ba = LOAD64(state[ 0]); \
+ X##be = LOAD64(state[ 1]); \
+ X##bi = LOAD64(state[ 2]); \
+ X##bo = LOAD64(state[ 3]); \
+ X##bu = LOAD64(state[ 4]); \
+ X##ga = LOAD64(state[ 5]); \
+ X##ge = LOAD64(state[ 6]); \
+ X##gi = LOAD64(state[ 7]); \
+ X##go = LOAD64(state[ 8]); \
+ X##gu = LOAD64(state[ 9]); \
+ X##ka = LOAD64(state[10]); \
+ X##ke = LOAD64(state[11]); \
+ X##ki = LOAD64(state[12]); \
+ X##ko = LOAD64(state[13]); \
+ X##ku = LOAD64(state[14]); \
+ X##ma = LOAD64(state[15]); \
+ X##me = LOAD64(state[16]); \
+ X##mi = LOAD64(state[17]); \
+ X##mo = LOAD64(state[18]); \
+ X##mu = LOAD64(state[19]); \
+ X##sa = LOAD64(state[20]); \
+ X##se = LOAD64(state[21]); \
+ X##si = LOAD64(state[22]); \
+ X##so = LOAD64(state[23]); \
+ X##su = LOAD64(state[24]); \
+
+#define copyToState(state, X) \
+ STORE64(state[ 0], X##ba); \
+ STORE64(state[ 1], X##be); \
+ STORE64(state[ 2], X##bi); \
+ STORE64(state[ 3], X##bo); \
+ STORE64(state[ 4], X##bu); \
+ STORE64(state[ 5], X##ga); \
+ STORE64(state[ 6], X##ge); \
+ STORE64(state[ 7], X##gi); \
+ STORE64(state[ 8], X##go); \
+ STORE64(state[ 9], X##gu); \
+ STORE64(state[10], X##ka); \
+ STORE64(state[11], X##ke); \
+ STORE64(state[12], X##ki); \
+ STORE64(state[13], X##ko); \
+ STORE64(state[14], X##ku); \
+ STORE64(state[15], X##ma); \
+ STORE64(state[16], X##me); \
+ STORE64(state[17], X##mi); \
+ STORE64(state[18], X##mo); \
+ STORE64(state[19], X##mu); \
+ STORE64(state[20], X##sa); \
+ STORE64(state[21], X##se); \
+ STORE64(state[22], X##si); \
+ STORE64(state[23], X##so); \
+ STORE64(state[24], X##su); \
+
+#define copyStateVariables(X, Y) \
+ X##ba = Y##ba; \
+ X##be = Y##be; \
+ X##bi = Y##bi; \
+ X##bo = Y##bo; \
+ X##bu = Y##bu; \
+ X##ga = Y##ga; \
+ X##ge = Y##ge; \
+ X##gi = Y##gi; \
+ X##go = Y##go; \
+ X##gu = Y##gu; \
+ X##ka = Y##ka; \
+ X##ke = Y##ke; \
+ X##ki = Y##ki; \
+ X##ko = Y##ko; \
+ X##ku = Y##ku; \
+ X##ma = Y##ma; \
+ X##me = Y##me; \
+ X##mi = Y##mi; \
+ X##mo = Y##mo; \
+ X##mu = Y##mu; \
+ X##sa = Y##sa; \
+ X##se = Y##se; \
+ X##si = Y##si; \
+ X##so = Y##so; \
+ X##su = Y##su; \
+
diff --git a/Modules/_sha3/keccak/KeccakF-1600-unrolling.macros b/Modules/_sha3/keccak/KeccakF-1600-unrolling.macros
new file mode 100644
index 0000000000..83c694ca48
--- /dev/null
+++ b/Modules/_sha3/keccak/KeccakF-1600-unrolling.macros
@@ -0,0 +1,124 @@
+/*
+The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
+Michaël Peeters and Gilles Van Assche. For more information, feedback or
+questions, please refer to our website: http://keccak.noekeon.org/
+
+Implementation by the designers,
+hereby denoted as "the implementer".
+
+To the extent possible under law, the implementer has waived all copyright
+and related or neighboring rights to the source code in this file.
+http://creativecommons.org/publicdomain/zero/1.0/
+*/
+
+#if (Unrolling == 24)
+#define rounds \
+ prepareTheta \
+ thetaRhoPiChiIotaPrepareTheta( 0, A, E) \
+ thetaRhoPiChiIotaPrepareTheta( 1, E, A) \
+ thetaRhoPiChiIotaPrepareTheta( 2, A, E) \
+ thetaRhoPiChiIotaPrepareTheta( 3, E, A) \
+ thetaRhoPiChiIotaPrepareTheta( 4, A, E) \
+ thetaRhoPiChiIotaPrepareTheta( 5, E, A) \
+ thetaRhoPiChiIotaPrepareTheta( 6, A, E) \
+ thetaRhoPiChiIotaPrepareTheta( 7, E, A) \
+ thetaRhoPiChiIotaPrepareTheta( 8, A, E) \
+ thetaRhoPiChiIotaPrepareTheta( 9, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(10, A, E) \
+ thetaRhoPiChiIotaPrepareTheta(11, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(12, A, E) \
+ thetaRhoPiChiIotaPrepareTheta(13, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(14, A, E) \
+ thetaRhoPiChiIotaPrepareTheta(15, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(16, A, E) \
+ thetaRhoPiChiIotaPrepareTheta(17, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(18, A, E) \
+ thetaRhoPiChiIotaPrepareTheta(19, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(20, A, E) \
+ thetaRhoPiChiIotaPrepareTheta(21, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(22, A, E) \
+ thetaRhoPiChiIota(23, E, A) \
+ copyToState(state, A)
+#elif (Unrolling == 12)
+#define rounds \
+ prepareTheta \
+ for(i=0; i<24; i+=12) { \
+ thetaRhoPiChiIotaPrepareTheta(i , A, E) \
+ thetaRhoPiChiIotaPrepareTheta(i+ 1, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(i+ 2, A, E) \
+ thetaRhoPiChiIotaPrepareTheta(i+ 3, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(i+ 4, A, E) \
+ thetaRhoPiChiIotaPrepareTheta(i+ 5, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(i+ 6, A, E) \
+ thetaRhoPiChiIotaPrepareTheta(i+ 7, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(i+ 8, A, E) \
+ thetaRhoPiChiIotaPrepareTheta(i+ 9, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(i+10, A, E) \
+ thetaRhoPiChiIotaPrepareTheta(i+11, E, A) \
+ } \
+ copyToState(state, A)
+#elif (Unrolling == 8)
+#define rounds \
+ prepareTheta \
+ for(i=0; i<24; i+=8) { \
+ thetaRhoPiChiIotaPrepareTheta(i , A, E) \
+ thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
+ thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(i+4, A, E) \
+ thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(i+6, A, E) \
+ thetaRhoPiChiIotaPrepareTheta(i+7, E, A) \
+ } \
+ copyToState(state, A)
+#elif (Unrolling == 6)
+#define rounds \
+ prepareTheta \
+ for(i=0; i<24; i+=6) { \
+ thetaRhoPiChiIotaPrepareTheta(i , A, E) \
+ thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
+ thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(i+4, A, E) \
+ thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \
+ } \
+ copyToState(state, A)
+#elif (Unrolling == 4)
+#define rounds \
+ prepareTheta \
+ for(i=0; i<24; i+=4) { \
+ thetaRhoPiChiIotaPrepareTheta(i , A, E) \
+ thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
+ thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \
+ } \
+ copyToState(state, A)
+#elif (Unrolling == 3)
+#define rounds \
+ prepareTheta \
+ for(i=0; i<24; i+=3) { \
+ thetaRhoPiChiIotaPrepareTheta(i , A, E) \
+ thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
+ copyStateVariables(A, E) \
+ } \
+ copyToState(state, A)
+#elif (Unrolling == 2)
+#define rounds \
+ prepareTheta \
+ for(i=0; i<24; i+=2) { \
+ thetaRhoPiChiIotaPrepareTheta(i , A, E) \
+ thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
+ } \
+ copyToState(state, A)
+#elif (Unrolling == 1)
+#define rounds \
+ prepareTheta \
+ for(i=0; i<24; i++) { \
+ thetaRhoPiChiIotaPrepareTheta(i , A, E) \
+ copyStateVariables(A, E) \
+ } \
+ copyToState(state, A)
+#else
+#error "Unrolling is not correctly specified!"
+#endif
diff --git a/Modules/_sha3/keccak/KeccakF-1600-xop.macros b/Modules/_sha3/keccak/KeccakF-1600-xop.macros
new file mode 100644
index 0000000000..823c946fff
--- /dev/null
+++ b/Modules/_sha3/keccak/KeccakF-1600-xop.macros
@@ -0,0 +1,573 @@
+/*
+The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
+Michaël Peeters and Gilles Van Assche. For more information, feedback or
+questions, please refer to our website: http://keccak.noekeon.org/
+
+Implementation by the designers,
+hereby denoted as "the implementer".
+
+To the extent possible under law, the implementer has waived all copyright
+and related or neighboring rights to the source code in this file.
+http://creativecommons.org/publicdomain/zero/1.0/
+*/
+
+#define declareABCDE \
+ V128 Abage, Abegi, Abigo, Abogu, Abuga; \
+ V128 Akame, Akemi, Akimo, Akomu, Akuma; \
+ V128 Abae, Abio, Agae, Agio, Akae, Akio, Amae, Amio; \
+ V64 Aba, Abe, Abi, Abo, Abu; \
+ V64 Aga, Age, Agi, Ago, Agu; \
+ V64 Aka, Ake, Aki, Ako, Aku; \
+ V64 Ama, Ame, Ami, Amo, Amu; \
+ V128 Asase, Asiso; \
+ V64 Asu; \
+ V128 Bbage, Bbegi, Bbigo, Bbogu, Bbuga; \
+ V128 Bkame, Bkemi, Bkimo, Bkomu, Bkuma; \
+ V128 Bsase, Bsesi, Bsiso, Bsosu, Bsusa; \
+ V128 Cae, Cei, Cio, Cou, Cua; \
+ V128 Dau, Dea, Die, Doi, Duo; \
+ V128 Dua, Dae, Dei, Dio, Dou; \
+ V128 Ebage, Ebegi, Ebigo, Ebogu, Ebuga; \
+ V128 Ekame, Ekemi, Ekimo, Ekomu, Ekuma; \
+ V128 Esase, Esiso; \
+ V64 Esu; \
+ V128 Zero;
+
+#define prepareTheta
+
+#define computeD \
+ Cua = GET64LOLO(Cua, Cae); \
+ Dei = XOR128(Cae, ROL6464same(Cio, 1)); \
+ Dou = XOR128(Cio, ROL6464same(Cua, 1)); \
+ Cei = GET64HILO(Cae, Cio); \
+ Dae = XOR128(Cua, ROL6464same(Cei, 1)); \
+ Dau = GET64LOHI(Dae, Dou); \
+ Dea = SWAP64(Dae); \
+ Die = SWAP64(Dei); \
+ Doi = GET64LOLO(Dou, Die); \
+ Duo = SWAP64(Dou);
+
+/* --- Theta Rho Pi Chi Iota Prepare-theta */
+/* --- 64-bit lanes mapped to 64-bit and 128-bit words */
+#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
+ computeD \
+ \
+ Bbage = XOR128(GET64LOHI(A##bage, A##bogu), Dau); \
+ Bbage = ROL6464(Bbage, 0, 20); \
+ Bbegi = XOR128(GET64HILO(A##bage, A##kame), Dea); \
+ Bbegi = ROL6464(Bbegi, 44, 3); \
+ Bbigo = XOR128(GET64LOHI(A##kimo, A##kame), Die); \
+ Bbigo = ROL6464(Bbigo, 43, 45); \
+ E##bage = XOR128(Bbage, ANDnu128(Bbegi, Bbigo)); \
+ XOReq128(E##bage, CONST64(KeccakF1600RoundConstants[i])); \
+ Cae = E##bage; \
+ Bbogu = XOR128(GET64HILO(A##kimo, A##siso), Doi); \
+ Bbogu = ROL6464(Bbogu, 21, 61); \
+ E##begi = XOR128(Bbegi, ANDnu128(Bbigo, Bbogu)); \
+ Cei = E##begi; \
+ Bbuga = XOR128(GET64LOLO(A##su, A##bogu), Duo); \
+ Bbuga = ROL6464(Bbuga, 14, 28); \
+ E##bigo = XOR128(Bbigo, ANDnu128(Bbogu, Bbuga)); \
+ Cio = E##bigo; \
+ E##bogu = XOR128(Bbogu, ANDnu128(Bbuga, Bbage)); \
+ Cou = E##bogu; \
+ E##buga = XOR128(Bbuga, ANDnu128(Bbage, Bbegi)); \
+ Cua = E##buga; \
+\
+ Bkame = XOR128(GET64LOHI(A##begi, A##buga), Dea); \
+ Bkame = ROL6464(Bkame, 1, 36); \
+ Bkemi = XOR128(GET64HILO(A##begi, A##kemi), Die); \
+ Bkemi = ROL6464(Bkemi, 6, 10); \
+ Bkimo = XOR128(GET64LOHI(A##komu, A##kemi), Doi); \
+ Bkimo = ROL6464(Bkimo, 25, 15); \
+ E##kame = XOR128(Bkame, ANDnu128(Bkemi, Bkimo)); \
+ XOReq128(Cae, E##kame); \
+ Bkomu = XOR128(GET64HIHI(A##komu, A##siso), Duo); \
+ Bkomu = ROL6464(Bkomu, 8, 56); \
+ E##kemi = XOR128(Bkemi, ANDnu128(Bkimo, Bkomu)); \
+ XOReq128(Cei, E##kemi); \
+ Bkuma = XOR128(GET64LOLO(A##sase, A##buga), Dau); \
+ Bkuma = ROL6464(Bkuma, 18, 27); \
+ E##kimo = XOR128(Bkimo, ANDnu128(Bkomu, Bkuma)); \
+ XOReq128(Cio, E##kimo); \
+ E##komu = XOR128(Bkomu, ANDnu128(Bkuma, Bkame)); \
+ XOReq128(Cou, E##komu); \
+ E##kuma = XOR128(Bkuma, ANDnu128(Bkame, Bkemi)); \
+ XOReq128(Cua, E##kuma); \
+\
+ Bsase = XOR128(A##bigo, SWAP64(Doi)); \
+ Bsase = ROL6464(Bsase, 62, 55); \
+ Bsiso = XOR128(A##kuma, SWAP64(Dau)); \
+ Bsiso = ROL6464(Bsiso, 39, 41); \
+ Bsusa = XOR64(COPY64HI2LO(A##sase), Dei); \
+ Bsusa = ROL6464same(Bsusa, 2); \
+ Bsusa = GET64LOLO(Bsusa, Bsase); \
+ Bsesi = GET64HILO(Bsase, Bsiso); \
+ Bsosu = GET64HILO(Bsiso, Bsusa); \
+ E##sase = XOR128(Bsase, ANDnu128(Bsesi, Bsiso)); \
+ XOReq128(Cae, E##sase); \
+ E##siso = XOR128(Bsiso, ANDnu128(Bsosu, Bsusa)); \
+ XOReq128(Cio, E##siso); \
+ E##su = GET64LOLO(XOR128(Bsusa, ANDnu128(Bsase, Bsesi)), Zero); \
+ XOReq128(Cua, E##su); \
+\
+ Zero = ZERO128(); \
+ XOReq128(Cae, GET64HIHI(Cua, Zero)); \
+ XOReq128(Cae, GET64LOLO(Zero, Cei)); \
+ XOReq128(Cio, GET64HIHI(Cei, Zero)); \
+ XOReq128(Cio, GET64LOLO(Zero, Cou)); \
+ XOReq128(Cua, GET64HIHI(Cou, Zero)); \
+
+/* --- Theta Rho Pi Chi Iota */
+/* --- 64-bit lanes mapped to 64-bit and 128-bit words */
+#define thetaRhoPiChiIota(i, A, E) thetaRhoPiChiIotaPrepareTheta(i, A, E)
+
+static const UINT64 KeccakF1600RoundConstants[24] = {
+ 0x0000000000000001ULL,
+ 0x0000000000008082ULL,
+ 0x800000000000808aULL,
+ 0x8000000080008000ULL,
+ 0x000000000000808bULL,
+ 0x0000000080000001ULL,
+ 0x8000000080008081ULL,
+ 0x8000000000008009ULL,
+ 0x000000000000008aULL,
+ 0x0000000000000088ULL,
+ 0x0000000080008009ULL,
+ 0x000000008000000aULL,
+ 0x000000008000808bULL,
+ 0x800000000000008bULL,
+ 0x8000000000008089ULL,
+ 0x8000000000008003ULL,
+ 0x8000000000008002ULL,
+ 0x8000000000000080ULL,
+ 0x000000000000800aULL,
+ 0x800000008000000aULL,
+ 0x8000000080008081ULL,
+ 0x8000000000008080ULL,
+ 0x0000000080000001ULL,
+ 0x8000000080008008ULL };
+
+#define copyFromStateAndXor576bits(X, state, input) \
+ X##bae = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
+ X##ba = X##bae; \
+ X##be = GET64HIHI(X##bae, X##bae); \
+ Cae = X##bae; \
+ X##bio = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
+ X##bi = X##bio; \
+ X##bo = GET64HIHI(X##bio, X##bio); \
+ Cio = X##bio; \
+ X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
+ Cua = X##bu; \
+ X##gae = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
+ X##ga = X##gae; \
+ X##buga = GET64LOLO(X##bu, X##ga); \
+ X##ge = GET64HIHI(X##gae, X##gae); \
+ X##bage = GET64LOLO(X##ba, X##ge); \
+ XOReq128(Cae, X##gae); \
+ X##gio = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
+ X##gi = X##gio; \
+ X##begi = GET64LOLO(X##be, X##gi); \
+ X##go = GET64HIHI(X##gio, X##gio); \
+ X##bigo = GET64LOLO(X##bi, X##go); \
+ XOReq128(Cio, X##gio); \
+ X##gu = LOAD64(state[ 9]); \
+ X##bogu = GET64LOLO(X##bo, X##gu); \
+ XOReq64(Cua, X##gu); \
+ X##kae = LOAD128(state[10]); \
+ X##ka = X##kae; \
+ X##ke = GET64HIHI(X##kae, X##kae); \
+ XOReq128(Cae, X##kae); \
+ X##kio = LOAD128(state[12]); \
+ X##ki = X##kio; \
+ X##ko = GET64HIHI(X##kio, X##kio); \
+ XOReq128(Cio, X##kio); \
+ X##kuma = LOAD128(state[14]); \
+ XOReq64(Cua, X##kuma); \
+ X##me = LOAD64(state[16]); \
+ X##kame = GET64LOLO(X##ka, X##me); \
+ XOReq128(Cae, GET64HIHI(X##kuma, X##kame)); \
+ X##mio = LOAD128u(state[17]); \
+ X##mi = X##mio; \
+ X##kemi = GET64LOLO(X##ke, X##mi); \
+ X##mo = GET64HIHI(X##mio, X##mio); \
+ X##kimo = GET64LOLO(X##ki, X##mo); \
+ XOReq128(Cio, X##mio); \
+ X##mu = LOAD64(state[19]); \
+ X##komu = GET64LOLO(X##ko, X##mu); \
+ XOReq64(Cua, X##mu); \
+ X##sase = LOAD128(state[20]); \
+ XOReq128(Cae, X##sase); \
+ X##siso = LOAD128(state[22]); \
+ XOReq128(Cio, X##siso); \
+ X##su = LOAD64(state[24]); \
+ XOReq64(Cua, X##su); \
+
+#define copyFromStateAndXor832bits(X, state, input) \
+ X##bae = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
+ X##ba = X##bae; \
+ X##be = GET64HIHI(X##bae, X##bae); \
+ Cae = X##bae; \
+ X##bio = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
+ X##bi = X##bio; \
+ X##bo = GET64HIHI(X##bio, X##bio); \
+ Cio = X##bio; \
+ X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
+ Cua = X##bu; \
+ X##gae = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
+ X##ga = X##gae; \
+ X##buga = GET64LOLO(X##bu, X##ga); \
+ X##ge = GET64HIHI(X##gae, X##gae); \
+ X##bage = GET64LOLO(X##ba, X##ge); \
+ XOReq128(Cae, X##gae); \
+ X##gio = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
+ X##gi = X##gio; \
+ X##begi = GET64LOLO(X##be, X##gi); \
+ X##go = GET64HIHI(X##gio, X##gio); \
+ X##bigo = GET64LOLO(X##bi, X##go); \
+ XOReq128(Cio, X##gio); \
+ X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
+ X##bogu = GET64LOLO(X##bo, X##gu); \
+ XOReq64(Cua, X##gu); \
+ X##kae = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \
+ X##ka = X##kae; \
+ X##ke = GET64HIHI(X##kae, X##kae); \
+ XOReq128(Cae, X##kae); \
+ X##kio = XOR128(LOAD128(state[12]), LOAD64(input[12])); \
+ X##ki = X##kio; \
+ X##ko = GET64HIHI(X##kio, X##kio); \
+ XOReq128(Cio, X##kio); \
+ X##kuma = LOAD128(state[14]); \
+ XOReq64(Cua, X##kuma); \
+ X##me = LOAD64(state[16]); \
+ X##kame = GET64LOLO(X##ka, X##me); \
+ XOReq128(Cae, GET64HIHI(X##kuma, X##kame)); \
+ X##mio = LOAD128u(state[17]); \
+ X##mi = X##mio; \
+ X##kemi = GET64LOLO(X##ke, X##mi); \
+ X##mo = GET64HIHI(X##mio, X##mio); \
+ X##kimo = GET64LOLO(X##ki, X##mo); \
+ XOReq128(Cio, X##mio); \
+ X##mu = LOAD64(state[19]); \
+ X##komu = GET64LOLO(X##ko, X##mu); \
+ XOReq64(Cua, X##mu); \
+ X##sase = LOAD128(state[20]); \
+ XOReq128(Cae, X##sase); \
+ X##siso = LOAD128(state[22]); \
+ XOReq128(Cio, X##siso); \
+ X##su = LOAD64(state[24]); \
+ XOReq64(Cua, X##su); \
+
+#define copyFromStateAndXor1024bits(X, state, input) \
+ X##bae = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
+ X##ba = X##bae; \
+ X##be = GET64HIHI(X##bae, X##bae); \
+ Cae = X##bae; \
+ X##bio = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
+ X##bi = X##bio; \
+ X##bo = GET64HIHI(X##bio, X##bio); \
+ Cio = X##bio; \
+ X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
+ Cua = X##bu; \
+ X##gae = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
+ X##ga = X##gae; \
+ X##buga = GET64LOLO(X##bu, X##ga); \
+ X##ge = GET64HIHI(X##gae, X##gae); \
+ X##bage = GET64LOLO(X##ba, X##ge); \
+ XOReq128(Cae, X##gae); \
+ X##gio = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
+ X##gi = X##gio; \
+ X##begi = GET64LOLO(X##be, X##gi); \
+ X##go = GET64HIHI(X##gio, X##gio); \
+ X##bigo = GET64LOLO(X##bi, X##go); \
+ XOReq128(Cio, X##gio); \
+ X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
+ X##bogu = GET64LOLO(X##bo, X##gu); \
+ XOReq64(Cua, X##gu); \
+ X##kae = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \
+ X##ka = X##kae; \
+ X##ke = GET64HIHI(X##kae, X##kae); \
+ XOReq128(Cae, X##kae); \
+ X##kio = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \
+ X##ki = X##kio; \
+ X##ko = GET64HIHI(X##kio, X##kio); \
+ XOReq128(Cio, X##kio); \
+ X##kuma = XOR128(LOAD128(state[14]), LOAD128(input[14])); \
+ XOReq64(Cua, X##kuma); \
+ X##me = LOAD64(state[16]); \
+ X##kame = GET64LOLO(X##ka, X##me); \
+ XOReq128(Cae, GET64HIHI(X##kuma, X##kame)); \
+ X##mio = LOAD128u(state[17]); \
+ X##mi = X##mio; \
+ X##kemi = GET64LOLO(X##ke, X##mi); \
+ X##mo = GET64HIHI(X##mio, X##mio); \
+ X##kimo = GET64LOLO(X##ki, X##mo); \
+ XOReq128(Cio, X##mio); \
+ X##mu = LOAD64(state[19]); \
+ X##komu = GET64LOLO(X##ko, X##mu); \
+ XOReq64(Cua, X##mu); \
+ X##sase = LOAD128(state[20]); \
+ XOReq128(Cae, X##sase); \
+ X##siso = LOAD128(state[22]); \
+ XOReq128(Cio, X##siso); \
+ X##su = LOAD64(state[24]); \
+ XOReq64(Cua, X##su); \
+
+#define copyFromStateAndXor1088bits(X, state, input) \
+ X##bae = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
+ X##ba = X##bae; \
+ X##be = GET64HIHI(X##bae, X##bae); \
+ Cae = X##bae; \
+ X##bio = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
+ X##bi = X##bio; \
+ X##bo = GET64HIHI(X##bio, X##bio); \
+ Cio = X##bio; \
+ X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
+ Cua = X##bu; \
+ X##gae = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
+ X##ga = X##gae; \
+ X##buga = GET64LOLO(X##bu, X##ga); \
+ X##ge = GET64HIHI(X##gae, X##gae); \
+ X##bage = GET64LOLO(X##ba, X##ge); \
+ XOReq128(Cae, X##gae); \
+ X##gio = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
+ X##gi = X##gio; \
+ X##begi = GET64LOLO(X##be, X##gi); \
+ X##go = GET64HIHI(X##gio, X##gio); \
+ X##bigo = GET64LOLO(X##bi, X##go); \
+ XOReq128(Cio, X##gio); \
+ X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
+ X##bogu = GET64LOLO(X##bo, X##gu); \
+ XOReq64(Cua, X##gu); \
+ X##kae = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \
+ X##ka = X##kae; \
+ X##ke = GET64HIHI(X##kae, X##kae); \
+ XOReq128(Cae, X##kae); \
+ X##kio = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \
+ X##ki = X##kio; \
+ X##ko = GET64HIHI(X##kio, X##kio); \
+ XOReq128(Cio, X##kio); \
+ X##kuma = XOR128(LOAD128(state[14]), LOAD128(input[14])); \
+ XOReq64(Cua, X##kuma); \
+ X##me = XOR64(LOAD64(state[16]), LOAD64(input[16])); \
+ X##kame = GET64LOLO(X##ka, X##me); \
+ XOReq128(Cae, GET64HIHI(X##kuma, X##kame)); \
+ X##mio = LOAD128u(state[17]); \
+ X##mi = X##mio; \
+ X##kemi = GET64LOLO(X##ke, X##mi); \
+ X##mo = GET64HIHI(X##mio, X##mio); \
+ X##kimo = GET64LOLO(X##ki, X##mo); \
+ XOReq128(Cio, X##mio); \
+ X##mu = LOAD64(state[19]); \
+ X##komu = GET64LOLO(X##ko, X##mu); \
+ XOReq64(Cua, X##mu); \
+ X##sase = LOAD128(state[20]); \
+ XOReq128(Cae, X##sase); \
+ X##siso = LOAD128(state[22]); \
+ XOReq128(Cio, X##siso); \
+ X##su = LOAD64(state[24]); \
+ XOReq64(Cua, X##su); \
+
+#define copyFromStateAndXor1152bits(X, state, input) \
+ X##bae = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
+ X##ba = X##bae; \
+ X##be = GET64HIHI(X##bae, X##bae); \
+ Cae = X##bae; \
+ X##bio = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
+ X##bi = X##bio; \
+ X##bo = GET64HIHI(X##bio, X##bio); \
+ Cio = X##bio; \
+ X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
+ Cua = X##bu; \
+ X##gae = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
+ X##ga = X##gae; \
+ X##buga = GET64LOLO(X##bu, X##ga); \
+ X##ge = GET64HIHI(X##gae, X##gae); \
+ X##bage = GET64LOLO(X##ba, X##ge); \
+ XOReq128(Cae, X##gae); \
+ X##gio = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
+ X##gi = X##gio; \
+ X##begi = GET64LOLO(X##be, X##gi); \
+ X##go = GET64HIHI(X##gio, X##gio); \
+ X##bigo = GET64LOLO(X##bi, X##go); \
+ XOReq128(Cio, X##gio); \
+ X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
+ X##bogu = GET64LOLO(X##bo, X##gu); \
+ XOReq64(Cua, X##gu); \
+ X##kae = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \
+ X##ka = X##kae; \
+ X##ke = GET64HIHI(X##kae, X##kae); \
+ XOReq128(Cae, X##kae); \
+ X##kio = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \
+ X##ki = X##kio; \
+ X##ko = GET64HIHI(X##kio, X##kio); \
+ XOReq128(Cio, X##kio); \
+ X##kuma = XOR128(LOAD128(state[14]), LOAD128(input[14])); \
+ XOReq64(Cua, X##kuma); \
+ X##me = XOR64(LOAD64(state[16]), LOAD64(input[16])); \
+ X##kame = GET64LOLO(X##ka, X##me); \
+ XOReq128(Cae, GET64HIHI(X##kuma, X##kame)); \
+ X##mio = XOR128(LOAD128u(state[17]), LOAD64(input[17])); \
+ X##mi = X##mio; \
+ X##kemi = GET64LOLO(X##ke, X##mi); \
+ X##mo = GET64HIHI(X##mio, X##mio); \
+ X##kimo = GET64LOLO(X##ki, X##mo); \
+ XOReq128(Cio, X##mio); \
+ X##mu = LOAD64(state[19]); \
+ X##komu = GET64LOLO(X##ko, X##mu); \
+ XOReq64(Cua, X##mu); \
+ X##sase = LOAD128(state[20]); \
+ XOReq128(Cae, X##sase); \
+ X##siso = LOAD128(state[22]); \
+ XOReq128(Cio, X##siso); \
+ X##su = LOAD64(state[24]); \
+ XOReq64(Cua, X##su); \
+
+#define copyFromStateAndXor1344bits(X, state, input) \
+ X##bae = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
+ X##ba = X##bae; \
+ X##be = GET64HIHI(X##bae, X##bae); \
+ Cae = X##bae; \
+ X##bio = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
+ X##bi = X##bio; \
+ X##bo = GET64HIHI(X##bio, X##bio); \
+ Cio = X##bio; \
+ X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
+ Cua = X##bu; \
+ X##gae = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
+ X##ga = X##gae; \
+ X##buga = GET64LOLO(X##bu, X##ga); \
+ X##ge = GET64HIHI(X##gae, X##gae); \
+ X##bage = GET64LOLO(X##ba, X##ge); \
+ XOReq128(Cae, X##gae); \
+ X##gio = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
+ X##gi = X##gio; \
+ X##begi = GET64LOLO(X##be, X##gi); \
+ X##go = GET64HIHI(X##gio, X##gio); \
+ X##bigo = GET64LOLO(X##bi, X##go); \
+ XOReq128(Cio, X##gio); \
+ X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
+ X##bogu = GET64LOLO(X##bo, X##gu); \
+ XOReq64(Cua, X##gu); \
+ X##kae = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \
+ X##ka = X##kae; \
+ X##ke = GET64HIHI(X##kae, X##kae); \
+ XOReq128(Cae, X##kae); \
+ X##kio = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \
+ X##ki = X##kio; \
+ X##ko = GET64HIHI(X##kio, X##kio); \
+ XOReq128(Cio, X##kio); \
+ X##kuma = XOR128(LOAD128(state[14]), LOAD128(input[14])); \
+ XOReq64(Cua, X##kuma); \
+ X##me = XOR64(LOAD64(state[16]), LOAD64(input[16])); \
+ X##kame = GET64LOLO(X##ka, X##me); \
+ XOReq128(Cae, GET64HIHI(X##kuma, X##kame)); \
+ X##mio = XOR128(LOAD128u(state[17]), LOAD128u(input[17])); \
+ X##mi = X##mio; \
+ X##kemi = GET64LOLO(X##ke, X##mi); \
+ X##mo = GET64HIHI(X##mio, X##mio); \
+ X##kimo = GET64LOLO(X##ki, X##mo); \
+ XOReq128(Cio, X##mio); \
+ X##mu = XOR64(LOAD64(state[19]), LOAD64(input[19])); \
+ X##komu = GET64LOLO(X##ko, X##mu); \
+ XOReq64(Cua, X##mu); \
+ X##sase = XOR128(LOAD128(state[20]), LOAD64(input[20])); \
+ XOReq128(Cae, X##sase); \
+ X##siso = LOAD128(state[22]); \
+ XOReq128(Cio, X##siso); \
+ X##su = LOAD64(state[24]); \
+ XOReq64(Cua, X##su); \
+
+#define copyFromState(X, state) \
+ X##bae = LOAD128(state[ 0]); \
+ X##ba = X##bae; \
+ X##be = GET64HIHI(X##bae, X##bae); \
+ Cae = X##bae; \
+ X##bio = LOAD128(state[ 2]); \
+ X##bi = X##bio; \
+ X##bo = GET64HIHI(X##bio, X##bio); \
+ Cio = X##bio; \
+ X##bu = LOAD64(state[ 4]); \
+ Cua = X##bu; \
+ X##gae = LOAD128u(state[ 5]); \
+ X##ga = X##gae; \
+ X##buga = GET64LOLO(X##bu, X##ga); \
+ X##ge = GET64HIHI(X##gae, X##gae); \
+ X##bage = GET64LOLO(X##ba, X##ge); \
+ XOReq128(Cae, X##gae); \
+ X##gio = LOAD128u(state[ 7]); \
+ X##gi = X##gio; \
+ X##begi = GET64LOLO(X##be, X##gi); \
+ X##go = GET64HIHI(X##gio, X##gio); \
+ X##bigo = GET64LOLO(X##bi, X##go); \
+ XOReq128(Cio, X##gio); \
+ X##gu = LOAD64(state[ 9]); \
+ X##bogu = GET64LOLO(X##bo, X##gu); \
+ XOReq64(Cua, X##gu); \
+ X##kae = LOAD128(state[10]); \
+ X##ka = X##kae; \
+ X##ke = GET64HIHI(X##kae, X##kae); \
+ XOReq128(Cae, X##kae); \
+ X##kio = LOAD128(state[12]); \
+ X##ki = X##kio; \
+ X##ko = GET64HIHI(X##kio, X##kio); \
+ XOReq128(Cio, X##kio); \
+ X##kuma = LOAD128(state[14]); \
+ XOReq64(Cua, X##kuma); \
+ X##me = LOAD64(state[16]); \
+ X##kame = GET64LOLO(X##ka, X##me); \
+ XOReq128(Cae, GET64HIHI(X##kuma, X##kame)); \
+ X##mio = LOAD128u(state[17]); \
+ X##mi = X##mio; \
+ X##kemi = GET64LOLO(X##ke, X##mi); \
+ X##mo = GET64HIHI(X##mio, X##mio); \
+ X##kimo = GET64LOLO(X##ki, X##mo); \
+ XOReq128(Cio, X##mio); \
+ X##mu = LOAD64(state[19]); \
+ X##komu = GET64LOLO(X##ko, X##mu); \
+ XOReq64(Cua, X##mu); \
+ X##sase = LOAD128(state[20]); \
+ XOReq128(Cae, X##sase); \
+ X##siso = LOAD128(state[22]); \
+ XOReq128(Cio, X##siso); \
+ X##su = LOAD64(state[24]); \
+ XOReq64(Cua, X##su); \
+
+#define copyToState(state, X) \
+ STORE64(state[ 0], X##bage); \
+ STORE64(state[ 1], X##begi); \
+ STORE64(state[ 2], X##bigo); \
+ STORE64(state[ 3], X##bogu); \
+ STORE128(state[ 4], X##buga); \
+ STORE64(state[ 6], COPY64HI2LO(X##bage)); \
+ STORE64(state[ 7], COPY64HI2LO(X##begi)); \
+ STORE64(state[ 8], COPY64HI2LO(X##bigo)); \
+ STORE64(state[ 9], COPY64HI2LO(X##bogu)); \
+ STORE64(state[10], X##kame); \
+ STORE64(state[11], X##kemi); \
+ STORE64(state[12], X##kimo); \
+ STORE64(state[13], X##komu); \
+ STORE128(state[14], X##kuma); \
+ STORE64(state[16], COPY64HI2LO(X##kame)); \
+ STORE64(state[17], COPY64HI2LO(X##kemi)); \
+ STORE64(state[18], COPY64HI2LO(X##kimo)); \
+ STORE64(state[19], COPY64HI2LO(X##komu)); \
+ STORE128(state[20], X##sase); \
+ STORE128(state[22], X##siso); \
+ STORE64(state[24], X##su); \
+
+#define copyStateVariables(X, Y) \
+ X##bage = Y##bage; \
+ X##begi = Y##begi; \
+ X##bigo = Y##bigo; \
+ X##bogu = Y##bogu; \
+ X##buga = Y##buga; \
+ X##kame = Y##kame; \
+ X##kemi = Y##kemi; \
+ X##kimo = Y##kimo; \
+ X##komu = Y##komu; \
+ X##kuma = Y##kuma; \
+ X##sase = Y##sase; \
+ X##siso = Y##siso; \
+ X##su = Y##su; \
+
diff --git a/Modules/_sha3/keccak/KeccakNISTInterface.c b/Modules/_sha3/keccak/KeccakNISTInterface.c
new file mode 100644
index 0000000000..e94082bc24
--- /dev/null
+++ b/Modules/_sha3/keccak/KeccakNISTInterface.c
@@ -0,0 +1,83 @@
+/*
+The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
+Michaël Peeters and Gilles Van Assche. For more information, feedback or
+questions, please refer to our website: http://keccak.noekeon.org/
+
+Implementation by the designers,
+hereby denoted as "the implementer".
+
+To the extent possible under law, the implementer has waived all copyright
+and related or neighboring rights to the source code in this file.
+http://creativecommons.org/publicdomain/zero/1.0/
+*/
+
+#include <string.h>
+#include "KeccakNISTInterface.h"
+#include "KeccakF-1600-interface.h"
+
+static HashReturn Init(hashState *state, int hashbitlen)
+{
+ switch(hashbitlen) {
+ case 0: /* Default parameters, arbitrary length output */
+ InitSponge((spongeState*)state, 1024, 576);
+ break;
+ case 224:
+ InitSponge((spongeState*)state, 1152, 448);
+ break;
+ case 256:
+ InitSponge((spongeState*)state, 1088, 512);
+ break;
+ case 384:
+ InitSponge((spongeState*)state, 832, 768);
+ break;
+ case 512:
+ InitSponge((spongeState*)state, 576, 1024);
+ break;
+ default:
+ return BAD_HASHLEN;
+ }
+ state->fixedOutputLength = hashbitlen;
+ return SUCCESS;
+}
+
+static HashReturn Update(hashState *state, const BitSequence *data, DataLength databitlen)
+{
+ if ((databitlen % 8) == 0)
+ return Absorb((spongeState*)state, data, databitlen);
+ else {
+ HashReturn ret = Absorb((spongeState*)state, data, databitlen - (databitlen % 8));
+ if (ret == SUCCESS) {
+ unsigned char lastByte;
+ /* Align the last partial byte to the least significant bits */
+ lastByte = data[databitlen/8] >> (8 - (databitlen % 8));
+ return Absorb((spongeState*)state, &lastByte, databitlen % 8);
+ }
+ else
+ return ret;
+ }
+}
+
+static HashReturn Final(hashState *state, BitSequence *hashval)
+{
+ return Squeeze(state, hashval, state->fixedOutputLength);
+}
+
+/*
+static HashReturn Hash(int hashbitlen, const BitSequence *data, DataLength databitlen, BitSequence *hashval)
+{
+ hashState state;
+ HashReturn result;
+
+ if ((hashbitlen != 224) && (hashbitlen != 256) && (hashbitlen != 384) && (hashbitlen != 512))
+ return BAD_HASHLEN; * Only the four fixed output lengths available through this API *
+ result = Init(&state, hashbitlen);
+ if (result != SUCCESS)
+ return result;
+ result = Update(&state, data, databitlen);
+ if (result != SUCCESS)
+ return result;
+ result = Final(&state, hashval);
+ return result;
+}
+*/
+
diff --git a/Modules/_sha3/keccak/KeccakNISTInterface.h b/Modules/_sha3/keccak/KeccakNISTInterface.h
new file mode 100644
index 0000000000..244431b1eb
--- /dev/null
+++ b/Modules/_sha3/keccak/KeccakNISTInterface.h
@@ -0,0 +1,72 @@
+/*
+The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
+Michaël Peeters and Gilles Van Assche. For more information, feedback or
+questions, please refer to our website: http://keccak.noekeon.org/
+
+Implementation by the designers,
+hereby denoted as "the implementer".
+
+To the extent possible under law, the implementer has waived all copyright
+and related or neighboring rights to the source code in this file.
+http://creativecommons.org/publicdomain/zero/1.0/
+*/
+
+#ifndef _KeccakNISTInterface_h_
+#define _KeccakNISTInterface_h_
+
+#include "KeccakSponge.h"
+
+typedef unsigned char BitSequence;
+typedef unsigned long long DataLength;
+typedef enum { SUCCESS = 0, FAIL = 1, BAD_HASHLEN = 2 } HashReturn;
+
+typedef spongeState hashState;
+
+/**
+ * Function to initialize the state of the Keccak[r, c] sponge function.
+ * The rate r and capacity c values are determined from @a hashbitlen.
+ * @param state Pointer to the state of the sponge function to be initialized.
+ * @param hashbitlen The desired number of output bits,
+ * or 0 for Keccak[] with default parameters
+ * and arbitrarily-long output.
+ * @pre The value of hashbitlen must be one of 0, 224, 256, 384 and 512.
+ * @return SUCCESS if successful, BAD_HASHLEN if the value of hashbitlen is incorrect.
+ */
+static HashReturn Init(hashState *state, int hashbitlen);
+/**
+ * Function to give input data for the sponge function to absorb.
+ * @param state Pointer to the state of the sponge function initialized by Init().
+ * @param data Pointer to the input data.
+ * When @a databitLen is not a multiple of 8, the last bits of data must be
+ * in the most significant bits of the last byte.
+ * @param databitLen The number of input bits provided in the input data.
+ * @pre In the previous call to Absorb(), databitLen was a multiple of 8.
+ * @return SUCCESS if successful, FAIL otherwise.
+ */
+static HashReturn Update(hashState *state, const BitSequence *data, DataLength databitlen);
+/**
+ * Function to squeeze output data from the sponge function.
+ * If @a hashbitlen was not 0 in the call to Init(), the number of output bits is equal to @a hashbitlen.
+ * If @a hashbitlen was 0 in the call to Init(), the output bits must be extracted using the Squeeze() function.
+ * @param state Pointer to the state of the sponge function initialized by Init().
+ * @param hashval Pointer to the buffer where to store the output data.
+ * @return SUCCESS if successful, FAIL otherwise.
+ */
+static HashReturn Final(hashState *state, BitSequence *hashval);
+/**
+ * Function to compute a hash using the Keccak[r, c] sponge function.
+ * The rate r and capacity c values are determined from @a hashbitlen.
+ * @param hashbitlen The desired number of output bits.
+ * @param data Pointer to the input data.
+ * When @a databitLen is not a multiple of 8, the last bits of data must be
+ * in the most significant bits of the last byte.
+ * @param databitLen The number of input bits provided in the input data.
+ * @param hashval Pointer to the buffer where to store the output data.
+ * @pre The value of hashbitlen must be one of 224, 256, 384 and 512.
+ * @return SUCCESS if successful, BAD_HASHLEN if the value of hashbitlen is incorrect.
+ */
+/*
+static HashReturn Hash(int hashbitlen, const BitSequence *data, DataLength databitlen, BitSequence *hashval);
+*/
+
+#endif
diff --git a/Modules/_sha3/keccak/KeccakSponge.c b/Modules/_sha3/keccak/KeccakSponge.c
new file mode 100644
index 0000000000..1ca6bf0010
--- /dev/null
+++ b/Modules/_sha3/keccak/KeccakSponge.c
@@ -0,0 +1,266 @@
+/*
+The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
+Michaël Peeters and Gilles Van Assche. For more information, feedback or
+questions, please refer to our website: http://keccak.noekeon.org/
+
+Implementation by the designers,
+hereby denoted as "the implementer".
+
+To the extent possible under law, the implementer has waived all copyright
+and related or neighboring rights to the source code in this file.
+http://creativecommons.org/publicdomain/zero/1.0/
+*/
+
+#include <string.h>
+#include "KeccakSponge.h"
+#include "KeccakF-1600-interface.h"
+#ifdef KeccakReference
+#include "displayIntermediateValues.h"
+#endif
+
+static int InitSponge(spongeState *state, unsigned int rate, unsigned int capacity)
+{
+ if (rate+capacity != 1600)
+ return 1;
+ if ((rate <= 0) || (rate >= 1600) || ((rate % 64) != 0))
+ return 1;
+ KeccakInitialize();
+ state->rate = rate;
+ state->capacity = capacity;
+ state->fixedOutputLength = 0;
+ KeccakInitializeState(state->state);
+ memset(state->dataQueue, 0, KeccakMaximumRateInBytes);
+ state->bitsInQueue = 0;
+ state->squeezing = 0;
+ state->bitsAvailableForSqueezing = 0;
+
+ return 0;
+}
+
+static void AbsorbQueue(spongeState *state)
+{
+ /* state->bitsInQueue is assumed to be equal to state->rate */
+ #ifdef KeccakReference
+ displayBytes(1, "Block to be absorbed", state->dataQueue, state->rate/8);
+ #endif
+#ifdef ProvideFast576
+ if (state->rate == 576)
+ KeccakAbsorb576bits(state->state, state->dataQueue);
+ else
+#endif
+#ifdef ProvideFast832
+ if (state->rate == 832)
+ KeccakAbsorb832bits(state->state, state->dataQueue);
+ else
+#endif
+#ifdef ProvideFast1024
+ if (state->rate == 1024)
+ KeccakAbsorb1024bits(state->state, state->dataQueue);
+ else
+#endif
+#ifdef ProvideFast1088
+ if (state->rate == 1088)
+ KeccakAbsorb1088bits(state->state, state->dataQueue);
+ else
+#endif
+#ifdef ProvideFast1152
+ if (state->rate == 1152)
+ KeccakAbsorb1152bits(state->state, state->dataQueue);
+ else
+#endif
+#ifdef ProvideFast1344
+ if (state->rate == 1344)
+ KeccakAbsorb1344bits(state->state, state->dataQueue);
+ else
+#endif
+ KeccakAbsorb(state->state, state->dataQueue, state->rate/64);
+ state->bitsInQueue = 0;
+}
+
+static int Absorb(spongeState *state, const unsigned char *data, unsigned long long databitlen)
+{
+ unsigned long long i, j, wholeBlocks;
+ unsigned int partialBlock, partialByte;
+ const unsigned char *curData;
+
+ if ((state->bitsInQueue % 8) != 0)
+ return 1; /* Only the last call may contain a partial byte */
+ if (state->squeezing)
+ return 1; /* Too late for additional input */
+
+ i = 0;
+ while(i < databitlen) {
+ if ((state->bitsInQueue == 0) && (databitlen >= state->rate) && (i <= (databitlen-state->rate))) {
+ wholeBlocks = (databitlen-i)/state->rate;
+ curData = data+i/8;
+#ifdef ProvideFast576
+ if (state->rate == 576) {
+ for(j=0; j<wholeBlocks; j++, curData+=576/8) {
+ #ifdef KeccakReference
+ displayBytes(1, "Block to be absorbed", curData, state->rate/8);
+ #endif
+ KeccakAbsorb576bits(state->state, curData);
+ }
+ }
+ else
+#endif
+#ifdef ProvideFast832
+ if (state->rate == 832) {
+ for(j=0; j<wholeBlocks; j++, curData+=832/8) {
+ #ifdef KeccakReference
+ displayBytes(1, "Block to be absorbed", curData, state->rate/8);
+ #endif
+ KeccakAbsorb832bits(state->state, curData);
+ }
+ }
+ else
+#endif
+#ifdef ProvideFast1024
+ if (state->rate == 1024) {
+ for(j=0; j<wholeBlocks; j++, curData+=1024/8) {
+ #ifdef KeccakReference
+ displayBytes(1, "Block to be absorbed", curData, state->rate/8);
+ #endif
+ KeccakAbsorb1024bits(state->state, curData);
+ }
+ }
+ else
+#endif
+#ifdef ProvideFast1088
+ if (state->rate == 1088) {
+ for(j=0; j<wholeBlocks; j++, curData+=1088/8) {
+ #ifdef KeccakReference
+ displayBytes(1, "Block to be absorbed", curData, state->rate/8);
+ #endif
+ KeccakAbsorb1088bits(state->state, curData);
+ }
+ }
+ else
+#endif
+#ifdef ProvideFast1152
+ if (state->rate == 1152) {
+ for(j=0; j<wholeBlocks; j++, curData+=1152/8) {
+ #ifdef KeccakReference
+ displayBytes(1, "Block to be absorbed", curData, state->rate/8);
+ #endif
+ KeccakAbsorb1152bits(state->state, curData);
+ }
+ }
+ else
+#endif
+#ifdef ProvideFast1344
+ if (state->rate == 1344) {
+ for(j=0; j<wholeBlocks; j++, curData+=1344/8) {
+ #ifdef KeccakReference
+ displayBytes(1, "Block to be absorbed", curData, state->rate/8);
+ #endif
+ KeccakAbsorb1344bits(state->state, curData);
+ }
+ }
+ else
+#endif
+ {
+ for(j=0; j<wholeBlocks; j++, curData+=state->rate/8) {
+ #ifdef KeccakReference
+ displayBytes(1, "Block to be absorbed", curData, state->rate/8);
+ #endif
+ KeccakAbsorb(state->state, curData, state->rate/64);
+ }
+ }
+ i += wholeBlocks*state->rate;
+ }
+ else {
+ partialBlock = (unsigned int)(databitlen - i);
+ if (partialBlock+state->bitsInQueue > state->rate)
+ partialBlock = state->rate-state->bitsInQueue;
+ partialByte = partialBlock % 8;
+ partialBlock -= partialByte;
+ memcpy(state->dataQueue+state->bitsInQueue/8, data+i/8, partialBlock/8);
+ state->bitsInQueue += partialBlock;
+ i += partialBlock;
+ if (state->bitsInQueue == state->rate)
+ AbsorbQueue(state);
+ if (partialByte > 0) {
+ unsigned char mask = (1 << partialByte)-1;
+ state->dataQueue[state->bitsInQueue/8] = data[i/8] & mask;
+ state->bitsInQueue += partialByte;
+ i += partialByte;
+ }
+ }
+ }
+ return 0;
+}
+
+static void PadAndSwitchToSqueezingPhase(spongeState *state)
+{
+ /* Note: the bits are numbered from 0=LSB to 7=MSB */
+ if (state->bitsInQueue + 1 == state->rate) {
+ state->dataQueue[state->bitsInQueue/8 ] |= 1 << (state->bitsInQueue % 8);
+ AbsorbQueue(state);
+ memset(state->dataQueue, 0, state->rate/8);
+ }
+ else {
+ memset(state->dataQueue + (state->bitsInQueue+7)/8, 0, state->rate/8 - (state->bitsInQueue+7)/8);
+ state->dataQueue[state->bitsInQueue/8 ] |= 1 << (state->bitsInQueue % 8);
+ }
+ state->dataQueue[(state->rate-1)/8] |= 1 << ((state->rate-1) % 8);
+ AbsorbQueue(state);
+
+ #ifdef KeccakReference
+ displayText(1, "--- Switching to squeezing phase ---");
+ #endif
+#ifdef ProvideFast1024
+ if (state->rate == 1024) {
+ KeccakExtract1024bits(state->state, state->dataQueue);
+ state->bitsAvailableForSqueezing = 1024;
+ }
+ else
+#endif
+ {
+ KeccakExtract(state->state, state->dataQueue, state->rate/64);
+ state->bitsAvailableForSqueezing = state->rate;
+ }
+ #ifdef KeccakReference
+ displayBytes(1, "Block available for squeezing", state->dataQueue, state->bitsAvailableForSqueezing/8);
+ #endif
+ state->squeezing = 1;
+}
+
+static int Squeeze(spongeState *state, unsigned char *output, unsigned long long outputLength)
+{
+ unsigned long long i;
+ unsigned int partialBlock;
+
+ if (!state->squeezing)
+ PadAndSwitchToSqueezingPhase(state);
+ if ((outputLength % 8) != 0)
+ return 1; /* Only multiple of 8 bits are allowed, truncation can be done at user level */
+
+ i = 0;
+ while(i < outputLength) {
+ if (state->bitsAvailableForSqueezing == 0) {
+ KeccakPermutation(state->state);
+#ifdef ProvideFast1024
+ if (state->rate == 1024) {
+ KeccakExtract1024bits(state->state, state->dataQueue);
+ state->bitsAvailableForSqueezing = 1024;
+ }
+ else
+#endif
+ {
+ KeccakExtract(state->state, state->dataQueue, state->rate/64);
+ state->bitsAvailableForSqueezing = state->rate;
+ }
+ #ifdef KeccakReference
+ displayBytes(1, "Block available for squeezing", state->dataQueue, state->bitsAvailableForSqueezing/8);
+ #endif
+ }
+ partialBlock = state->bitsAvailableForSqueezing;
+ if ((unsigned long long)partialBlock > outputLength - i)
+ partialBlock = (unsigned int)(outputLength - i);
+ memcpy(output+i/8, state->dataQueue+(state->rate-state->bitsAvailableForSqueezing)/8, partialBlock/8);
+ state->bitsAvailableForSqueezing -= partialBlock;
+ i += partialBlock;
+ }
+ return 0;
+}
diff --git a/Modules/_sha3/keccak/KeccakSponge.h b/Modules/_sha3/keccak/KeccakSponge.h
new file mode 100644
index 0000000000..a545cacb30
--- /dev/null
+++ b/Modules/_sha3/keccak/KeccakSponge.h
@@ -0,0 +1,76 @@
+/*
+The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
+Michaël Peeters and Gilles Van Assche. For more information, feedback or
+questions, please refer to our website: http://keccak.noekeon.org/
+
+Implementation by the designers,
+hereby denoted as "the implementer".
+
+To the extent possible under law, the implementer has waived all copyright
+and related or neighboring rights to the source code in this file.
+http://creativecommons.org/publicdomain/zero/1.0/
+*/
+
+#ifndef _KeccakSponge_h_
+#define _KeccakSponge_h_
+
+#define KeccakPermutationSize 1600
+#define KeccakPermutationSizeInBytes (KeccakPermutationSize/8)
+#define KeccakMaximumRate 1536
+#define KeccakMaximumRateInBytes (KeccakMaximumRate/8)
+
+#if defined(__GNUC__)
+#define ALIGN __attribute__ ((aligned(32)))
+#elif defined(_MSC_VER)
+#define ALIGN __declspec(align(32))
+#else
+#define ALIGN
+#endif
+
+ALIGN typedef struct spongeStateStruct {
+ ALIGN unsigned char state[KeccakPermutationSizeInBytes];
+ ALIGN unsigned char dataQueue[KeccakMaximumRateInBytes];
+ unsigned int rate;
+ unsigned int capacity;
+ unsigned int bitsInQueue;
+ unsigned int fixedOutputLength;
+ int squeezing;
+ unsigned int bitsAvailableForSqueezing;
+} spongeState;
+
+/**
+ * Function to initialize the state of the Keccak[r, c] sponge function.
+ * The sponge function is set to the absorbing phase.
+ * @param state Pointer to the state of the sponge function to be initialized.
+ * @param rate The value of the rate r.
+ * @param capacity The value of the capacity c.
+ * @pre One must have r+c=1600 and the rate a multiple of 64 bits in this implementation.
+ * @return Zero if successful, 1 otherwise.
+ */
+static int InitSponge(spongeState *state, unsigned int rate, unsigned int capacity);
+/**
+ * Function to give input data for the sponge function to absorb.
+ * @param state Pointer to the state of the sponge function initialized by InitSponge().
+ * @param data Pointer to the input data.
+ * When @a databitLen is not a multiple of 8, the last bits of data must be
+ * in the least significant bits of the last byte.
+ * @param databitLen The number of input bits provided in the input data.
+ * @pre In the previous call to Absorb(), databitLen was a multiple of 8.
+ * @pre The sponge function must be in the absorbing phase,
+ * i.e., Squeeze() must not have been called before.
+ * @return Zero if successful, 1 otherwise.
+ */
+static int Absorb(spongeState *state, const unsigned char *data, unsigned long long databitlen);
+/**
+ * Function to squeeze output data from the sponge function.
+ * If the sponge function was in the absorbing phase, this function
+ * switches it to the squeezing phase.
+ * @param state Pointer to the state of the sponge function initialized by InitSponge().
+ * @param output Pointer to the buffer where to store the output data.
+ * @param outputLength The number of output bits desired.
+ * It must be a multiple of 8.
+ * @return Zero if successful, 1 otherwise.
+ */
+static int Squeeze(spongeState *state, unsigned char *output, unsigned long long outputLength);
+
+#endif
diff --git a/Modules/_sha3/keccak/brg_endian.h b/Modules/_sha3/keccak/brg_endian.h
new file mode 100755
index 0000000000..7226eb3bec
--- /dev/null
+++ b/Modules/_sha3/keccak/brg_endian.h
@@ -0,0 +1,142 @@
+/*
+ ---------------------------------------------------------------------------
+ Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
+
+ LICENSE TERMS
+
+ The redistribution and use of this software (with or without changes)
+ is allowed without the payment of fees or royalties provided that:
+
+ 1. source code distributions include the above copyright notice, this
+ list of conditions and the following disclaimer;
+
+ 2. binary distributions include the above copyright notice, this list
+ of conditions and the following disclaimer in their documentation;
+
+ 3. the name of the copyright holder is not used to endorse products
+ built using this software without specific written permission.
+
+ DISCLAIMER
+
+ This software is provided 'as is' with no explicit or implied warranties
+ in respect of its properties, including, but not limited to, correctness
+ and/or fitness for purpose.
+ ---------------------------------------------------------------------------
+ Issue Date: 20/12/2007
+ Changes for ARM 9/9/2010
+*/
+
+#ifndef _BRG_ENDIAN_H
+#define _BRG_ENDIAN_H
+
+#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */
+#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */
+
+#if 0
+/* Include files where endian defines and byteswap functions may reside */
+#if defined( __sun )
+# include <sys/isa_defs.h>
+#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ )
+# include <sys/endian.h>
+#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \
+ defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ )
+# include <machine/endian.h>
+#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ )
+# if !defined( __MINGW32__ ) && !defined( _AIX )
+# include <endian.h>
+# if !defined( __BEOS__ )
+# include <byteswap.h>
+# endif
+# endif
+#endif
+#endif
+
+/* Now attempt to set the define for platform byte order using any */
+/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */
+/* seem to encompass most endian symbol definitions */
+
+#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN )
+# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN
+# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+# elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+# endif
+#elif defined( BIG_ENDIAN )
+# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+#elif defined( LITTLE_ENDIAN )
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+#endif
+
+#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN )
+# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN
+# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+# endif
+#elif defined( _BIG_ENDIAN )
+# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+#elif defined( _LITTLE_ENDIAN )
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+#endif
+
+#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN )
+# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN
+# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+# endif
+#elif defined( __BIG_ENDIAN )
+# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+#elif defined( __LITTLE_ENDIAN )
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+#endif
+
+#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ )
+# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__
+# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+# endif
+#elif defined( __BIG_ENDIAN__ )
+# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+#elif defined( __LITTLE_ENDIAN__ )
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+#endif
+
+/* if the platform byte order could not be determined, then try to */
+/* set this define using common machine defines */
+#if !defined(PLATFORM_BYTE_ORDER)
+
+#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \
+ defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \
+ defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \
+ defined( vax ) || defined( vms ) || defined( VMS ) || \
+ defined( __VMS ) || defined( _M_X64 )
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+
+#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \
+ defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \
+ defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \
+ defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \
+ defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \
+ defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \
+ defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX )
+# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+
+#elif defined(__arm__)
+# ifdef __BIG_ENDIAN
+# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+# else
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+# endif
+#elif 1 /* **** EDIT HERE IF NECESSARY **** */
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+#elif 0 /* **** EDIT HERE IF NECESSARY **** */
+# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+#else
+# error Please edit lines 132 or 134 in brg_endian.h to set the platform byte order
+#endif
+
+#endif
+
+#endif
diff --git a/Modules/_sha3/keccak/crypto_hash.h b/Modules/_sha3/keccak/crypto_hash.h
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/Modules/_sha3/keccak/crypto_hash.h