diff options
Diffstat (limited to 'lib/liboqs/src/common/sha3/xkcp_low/KeccakP-1600')
11 files changed, 2060 insertions, 0 deletions
diff --git a/lib/liboqs/src/common/sha3/xkcp_low/KeccakP-1600/plain-64bits/KeccakP-1600-64.macros b/lib/liboqs/src/common/sha3/xkcp_low/KeccakP-1600/plain-64bits/KeccakP-1600-64.macros new file mode 100644 index 000000000..c4dee90a3 --- /dev/null +++ b/lib/liboqs/src/common/sha3/xkcp_low/KeccakP-1600/plain-64bits/KeccakP-1600-64.macros @@ -0,0 +1,754 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +The Keccak-p permutations, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche. + +Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#define declareABCDE \ + uint64_t Aba, Abe, Abi, Abo, Abu; \ + uint64_t Aga, Age, Agi, Ago, Agu; \ + uint64_t Aka, Ake, Aki, Ako, Aku; \ + uint64_t Ama, Ame, Ami, Amo, Amu; \ + uint64_t Asa, Ase, Asi, Aso, Asu; \ + uint64_t Bba, Bbe, Bbi, Bbo, Bbu; \ + uint64_t Bga, Bge, Bgi, Bgo, Bgu; \ + uint64_t Bka, Bke, Bki, Bko, Bku; \ + uint64_t Bma, Bme, Bmi, Bmo, Bmu; \ + uint64_t Bsa, Bse, Bsi, Bso, Bsu; \ + uint64_t Ca, Ce, Ci, Co, Cu; \ + uint64_t Da, De, Di, Do, Du; \ + uint64_t Eba, Ebe, Ebi, Ebo, Ebu; \ + uint64_t Ega, Ege, Egi, Ego, Egu; \ + uint64_t Eka, Eke, Eki, Eko, Eku; \ + uint64_t Ema, Eme, Emi, Emo, Emu; \ + uint64_t Esa, Ese, Esi, Eso, Esu; \ + +#define prepareTheta \ + Ca = Aba^Aga^Aka^Ama^Asa; \ + Ce = Abe^Age^Ake^Ame^Ase; \ + Ci = Abi^Agi^Aki^Ami^Asi; \ + Co = Abo^Ago^Ako^Amo^Aso; \ + Cu = Abu^Agu^Aku^Amu^Asu; \ + +#ifdef UseBebigokimisa +/* --- Code for round, with prepare-theta (lane complementing pattern 'bebigokimisa') */ +/* --- 64-bit lanes mapped to 64-bit words */ +#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \ + Da = Cu^ROL64(Ce, 1); \ + De = Ca^ROL64(Ci, 1); \ + Di = Ce^ROL64(Co, 1); \ + Do = Ci^ROL64(Cu, 1); \ + Du = Co^ROL64(Ca, 1); \ +\ + A##ba ^= Da; \ + Bba = A##ba; \ + A##ge ^= De; \ + Bbe = ROL64(A##ge, 44); \ + A##ki ^= Di; \ + Bbi = ROL64(A##ki, 43); \ + A##mo ^= Do; \ + Bbo = ROL64(A##mo, 21); \ + A##su ^= Du; \ + Bbu = ROL64(A##su, 14); \ + E##ba = Bba ^( Bbe | Bbi ); \ + E##ba ^= KeccakF1600RoundConstants[i]; \ + Ca = E##ba; \ + E##be = Bbe ^((~Bbi)| Bbo ); \ + Ce = E##be; \ + E##bi = Bbi ^( Bbo & Bbu ); \ + Ci = E##bi; \ + E##bo = Bbo ^( Bbu | Bba ); \ + Co = E##bo; \ + E##bu = Bbu ^( Bba & Bbe ); \ + Cu = E##bu; \ +\ + A##bo ^= Do; \ + Bga = ROL64(A##bo, 28); \ + A##gu ^= Du; \ + Bge = ROL64(A##gu, 20); \ + A##ka ^= Da; \ + Bgi = ROL64(A##ka, 3); \ + A##me ^= De; \ + Bgo = ROL64(A##me, 45); \ + A##si ^= Di; \ + Bgu = ROL64(A##si, 61); \ + E##ga = Bga ^( Bge | Bgi ); \ + Ca ^= E##ga; \ + E##ge = Bge ^( Bgi & Bgo ); \ + Ce ^= E##ge; \ + E##gi = Bgi ^( Bgo |(~Bgu)); \ + Ci ^= E##gi; \ + E##go = Bgo ^( Bgu | Bga ); \ + Co ^= E##go; \ + E##gu = Bgu ^( Bga & Bge ); \ + Cu ^= E##gu; \ +\ + A##be ^= De; \ + Bka = ROL64(A##be, 1); \ + A##gi ^= Di; \ + Bke = ROL64(A##gi, 6); \ + A##ko ^= Do; \ + Bki = ROL64(A##ko, 25); \ + A##mu ^= Du; \ + Bko = ROL64(A##mu, 8); \ + A##sa ^= Da; \ + Bku = ROL64(A##sa, 18); \ + E##ka = Bka ^( Bke | Bki ); \ + Ca ^= E##ka; \ + E##ke = Bke ^( Bki & Bko ); \ + Ce ^= E##ke; \ + E##ki = Bki ^((~Bko)& Bku ); \ + Ci ^= E##ki; \ + E##ko = (~Bko)^( Bku | Bka ); \ + Co ^= E##ko; \ + E##ku = Bku ^( Bka & Bke ); \ + Cu ^= E##ku; \ +\ + A##bu ^= Du; \ + Bma = ROL64(A##bu, 27); \ + A##ga ^= Da; \ + Bme = ROL64(A##ga, 36); \ + A##ke ^= De; \ + Bmi = ROL64(A##ke, 10); \ + A##mi ^= Di; \ + Bmo = ROL64(A##mi, 15); \ + A##so ^= Do; \ + Bmu = ROL64(A##so, 56); \ + E##ma = Bma ^( Bme & Bmi ); \ + Ca ^= E##ma; \ + E##me = Bme ^( Bmi | Bmo ); \ + Ce ^= E##me; \ + E##mi = Bmi ^((~Bmo)| Bmu ); \ + Ci ^= E##mi; \ + E##mo = (~Bmo)^( Bmu & Bma ); \ + Co ^= E##mo; \ + E##mu = Bmu ^( Bma | Bme ); \ + Cu ^= E##mu; \ +\ + A##bi ^= Di; \ + Bsa = ROL64(A##bi, 62); \ + A##go ^= Do; \ + Bse = ROL64(A##go, 55); \ + A##ku ^= Du; \ + Bsi = ROL64(A##ku, 39); \ + A##ma ^= Da; \ + Bso = ROL64(A##ma, 41); \ + A##se ^= De; \ + Bsu = ROL64(A##se, 2); \ + E##sa = Bsa ^((~Bse)& Bsi ); \ + Ca ^= E##sa; \ + E##se = (~Bse)^( Bsi | Bso ); \ + Ce ^= E##se; \ + E##si = Bsi ^( Bso & Bsu ); \ + Ci ^= E##si; \ + E##so = Bso ^( Bsu | Bsa ); \ + Co ^= E##so; \ + E##su = Bsu ^( Bsa & Bse ); \ + Cu ^= E##su; \ +\ + +/* --- Code for round (lane complementing pattern 'bebigokimisa') */ +/* --- 64-bit lanes mapped to 64-bit words */ +#define thetaRhoPiChiIota(i, A, E) \ + Da = Cu^ROL64(Ce, 1); \ + De = Ca^ROL64(Ci, 1); \ + Di = Ce^ROL64(Co, 1); \ + Do = Ci^ROL64(Cu, 1); \ + Du = Co^ROL64(Ca, 1); \ +\ + A##ba ^= Da; \ + Bba = A##ba; \ + A##ge ^= De; \ + Bbe = ROL64(A##ge, 44); \ + A##ki ^= Di; \ + Bbi = ROL64(A##ki, 43); \ + A##mo ^= Do; \ + Bbo = ROL64(A##mo, 21); \ + A##su ^= Du; \ + Bbu = ROL64(A##su, 14); \ + E##ba = Bba ^( Bbe | Bbi ); \ + E##ba ^= KeccakF1600RoundConstants[i]; \ + E##be = Bbe ^((~Bbi)| Bbo ); \ + E##bi = Bbi ^( Bbo & Bbu ); \ + E##bo = Bbo ^( Bbu | Bba ); \ + E##bu = Bbu ^( Bba & Bbe ); \ +\ + A##bo ^= Do; \ + Bga = ROL64(A##bo, 28); \ + A##gu ^= Du; \ + Bge = ROL64(A##gu, 20); \ + A##ka ^= Da; \ + Bgi = ROL64(A##ka, 3); \ + A##me ^= De; \ + Bgo = ROL64(A##me, 45); \ + A##si ^= Di; \ + Bgu = ROL64(A##si, 61); \ + E##ga = Bga ^( Bge | Bgi ); \ + E##ge = Bge ^( Bgi & Bgo ); \ + E##gi = Bgi ^( Bgo |(~Bgu)); \ + E##go = Bgo ^( Bgu | Bga ); \ + E##gu = Bgu ^( Bga & Bge ); \ +\ + A##be ^= De; \ + Bka = ROL64(A##be, 1); \ + A##gi ^= Di; \ + Bke = ROL64(A##gi, 6); \ + A##ko ^= Do; \ + Bki = ROL64(A##ko, 25); \ + A##mu ^= Du; \ + Bko = ROL64(A##mu, 8); \ + A##sa ^= Da; \ + Bku = ROL64(A##sa, 18); \ + E##ka = Bka ^( Bke | Bki ); \ + E##ke = Bke ^( Bki & Bko ); \ + E##ki = Bki ^((~Bko)& Bku ); \ + E##ko = (~Bko)^( Bku | Bka ); \ + E##ku = Bku ^( Bka & Bke ); \ +\ + A##bu ^= Du; \ + Bma = ROL64(A##bu, 27); \ + A##ga ^= Da; \ + Bme = ROL64(A##ga, 36); \ + A##ke ^= De; \ + Bmi = ROL64(A##ke, 10); \ + A##mi ^= Di; \ + Bmo = ROL64(A##mi, 15); \ + A##so ^= Do; \ + Bmu = ROL64(A##so, 56); \ + E##ma = Bma ^( Bme & Bmi ); \ + E##me = Bme ^( Bmi | Bmo ); \ + E##mi = Bmi ^((~Bmo)| Bmu ); \ + E##mo = (~Bmo)^( Bmu & Bma ); \ + E##mu = Bmu ^( Bma | Bme ); \ +\ + A##bi ^= Di; \ + Bsa = ROL64(A##bi, 62); \ + A##go ^= Do; \ + Bse = ROL64(A##go, 55); \ + A##ku ^= Du; \ + Bsi = ROL64(A##ku, 39); \ + A##ma ^= Da; \ + Bso = ROL64(A##ma, 41); \ + A##se ^= De; \ + Bsu = ROL64(A##se, 2); \ + E##sa = Bsa ^((~Bse)& Bsi ); \ + E##se = (~Bse)^( Bsi | Bso ); \ + E##si = Bsi ^( Bso & Bsu ); \ + E##so = Bso ^( Bsu | Bsa ); \ + E##su = Bsu ^( Bsa & Bse ); \ +\ + +#else /* UseBebigokimisa */ +/* --- Code for round, with prepare-theta */ +/* --- 64-bit lanes mapped to 64-bit words */ +#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \ + Da = Cu^ROL64(Ce, 1); \ + De = Ca^ROL64(Ci, 1); \ + Di = Ce^ROL64(Co, 1); \ + Do = Ci^ROL64(Cu, 1); \ + Du = Co^ROL64(Ca, 1); \ +\ + A##ba ^= Da; \ + Bba = A##ba; \ + A##ge ^= De; \ + Bbe = ROL64(A##ge, 44); \ + A##ki ^= Di; \ + Bbi = ROL64(A##ki, 43); \ + A##mo ^= Do; \ + Bbo = ROL64(A##mo, 21); \ + A##su ^= Du; \ + Bbu = ROL64(A##su, 14); \ + E##ba = Bba ^((~Bbe)& Bbi ); \ + E##ba ^= KeccakF1600RoundConstants[i]; \ + Ca = E##ba; \ + E##be = Bbe ^((~Bbi)& Bbo ); \ + Ce = E##be; \ + E##bi = Bbi ^((~Bbo)& Bbu ); \ + Ci = E##bi; \ + E##bo = Bbo ^((~Bbu)& Bba ); \ + Co = E##bo; \ + E##bu = Bbu ^((~Bba)& Bbe ); \ + Cu = E##bu; \ +\ + A##bo ^= Do; \ + Bga = ROL64(A##bo, 28); \ + A##gu ^= Du; \ + Bge = ROL64(A##gu, 20); \ + A##ka ^= Da; \ + Bgi = ROL64(A##ka, 3); \ + A##me ^= De; \ + Bgo = ROL64(A##me, 45); \ + A##si ^= Di; \ + Bgu = ROL64(A##si, 61); \ + E##ga = Bga ^((~Bge)& Bgi ); \ + Ca ^= E##ga; \ + E##ge = Bge ^((~Bgi)& Bgo ); \ + Ce ^= E##ge; \ + E##gi = Bgi ^((~Bgo)& Bgu ); \ + Ci ^= E##gi; \ + E##go = Bgo ^((~Bgu)& Bga ); \ + Co ^= E##go; \ + E##gu = Bgu ^((~Bga)& Bge ); \ + Cu ^= E##gu; \ +\ + A##be ^= De; \ + Bka = ROL64(A##be, 1); \ + A##gi ^= Di; \ + Bke = ROL64(A##gi, 6); \ + A##ko ^= Do; \ + Bki = ROL64(A##ko, 25); \ + A##mu ^= Du; \ + Bko = ROL64(A##mu, 8); \ + A##sa ^= Da; \ + Bku = ROL64(A##sa, 18); \ + E##ka = Bka ^((~Bke)& Bki ); \ + Ca ^= E##ka; \ + E##ke = Bke ^((~Bki)& Bko ); \ + Ce ^= E##ke; \ + E##ki = Bki ^((~Bko)& Bku ); \ + Ci ^= E##ki; \ + E##ko = Bko ^((~Bku)& Bka ); \ + Co ^= E##ko; \ + E##ku = Bku ^((~Bka)& Bke ); \ + Cu ^= E##ku; \ +\ + A##bu ^= Du; \ + Bma = ROL64(A##bu, 27); \ + A##ga ^= Da; \ + Bme = ROL64(A##ga, 36); \ + A##ke ^= De; \ + Bmi = ROL64(A##ke, 10); \ + A##mi ^= Di; \ + Bmo = ROL64(A##mi, 15); \ + A##so ^= Do; \ + Bmu = ROL64(A##so, 56); \ + E##ma = Bma ^((~Bme)& Bmi ); \ + Ca ^= E##ma; \ + E##me = Bme ^((~Bmi)& Bmo ); \ + Ce ^= E##me; \ + E##mi = Bmi ^((~Bmo)& Bmu ); \ + Ci ^= E##mi; \ + E##mo = Bmo ^((~Bmu)& Bma ); \ + Co ^= E##mo; \ + E##mu = Bmu ^((~Bma)& Bme ); \ + Cu ^= E##mu; \ +\ + A##bi ^= Di; \ + Bsa = ROL64(A##bi, 62); \ + A##go ^= Do; \ + Bse = ROL64(A##go, 55); \ + A##ku ^= Du; \ + Bsi = ROL64(A##ku, 39); \ + A##ma ^= Da; \ + Bso = ROL64(A##ma, 41); \ + A##se ^= De; \ + Bsu = ROL64(A##se, 2); \ + E##sa = Bsa ^((~Bse)& Bsi ); \ + Ca ^= E##sa; \ + E##se = Bse ^((~Bsi)& Bso ); \ + Ce ^= E##se; \ + E##si = Bsi ^((~Bso)& Bsu ); \ + Ci ^= E##si; \ + E##so = Bso ^((~Bsu)& Bsa ); \ + Co ^= E##so; \ + E##su = Bsu ^((~Bsa)& Bse ); \ + Cu ^= E##su; \ +\ + +/* --- Code for round */ +/* --- 64-bit lanes mapped to 64-bit words */ +#define thetaRhoPiChiIota(i, A, E) \ + Da = Cu^ROL64(Ce, 1); \ + De = Ca^ROL64(Ci, 1); \ + Di = Ce^ROL64(Co, 1); \ + Do = Ci^ROL64(Cu, 1); \ + Du = Co^ROL64(Ca, 1); \ +\ + A##ba ^= Da; \ + Bba = A##ba; \ + A##ge ^= De; \ + Bbe = ROL64(A##ge, 44); \ + A##ki ^= Di; \ + Bbi = ROL64(A##ki, 43); \ + A##mo ^= Do; \ + Bbo = ROL64(A##mo, 21); \ + A##su ^= Du; \ + Bbu = ROL64(A##su, 14); \ + E##ba = Bba ^((~Bbe)& Bbi ); \ + E##ba ^= KeccakF1600RoundConstants[i]; \ + E##be = Bbe ^((~Bbi)& Bbo ); \ + E##bi = Bbi ^((~Bbo)& Bbu ); \ + E##bo = Bbo ^((~Bbu)& Bba ); \ + E##bu = Bbu ^((~Bba)& Bbe ); \ +\ + A##bo ^= Do; \ + Bga = ROL64(A##bo, 28); \ + A##gu ^= Du; \ + Bge = ROL64(A##gu, 20); \ + A##ka ^= Da; \ + Bgi = ROL64(A##ka, 3); \ + A##me ^= De; \ + Bgo = ROL64(A##me, 45); \ + A##si ^= Di; \ + Bgu = ROL64(A##si, 61); \ + E##ga = Bga ^((~Bge)& Bgi ); \ + E##ge = Bge ^((~Bgi)& Bgo ); \ + E##gi = Bgi ^((~Bgo)& Bgu ); \ + E##go = Bgo ^((~Bgu)& Bga ); \ + E##gu = Bgu ^((~Bga)& Bge ); \ +\ + A##be ^= De; \ + Bka = ROL64(A##be, 1); \ + A##gi ^= Di; \ + Bke = ROL64(A##gi, 6); \ + A##ko ^= Do; \ + Bki = ROL64(A##ko, 25); \ + A##mu ^= Du; \ + Bko = ROL64(A##mu, 8); \ + A##sa ^= Da; \ + Bku = ROL64(A##sa, 18); \ + E##ka = Bka ^((~Bke)& Bki ); \ + E##ke = Bke ^((~Bki)& Bko ); \ + E##ki = Bki ^((~Bko)& Bku ); \ + E##ko = Bko ^((~Bku)& Bka ); \ + E##ku = Bku ^((~Bka)& Bke ); \ +\ + A##bu ^= Du; \ + Bma = ROL64(A##bu, 27); \ + A##ga ^= Da; \ + Bme = ROL64(A##ga, 36); \ + A##ke ^= De; \ + Bmi = ROL64(A##ke, 10); \ + A##mi ^= Di; \ + Bmo = ROL64(A##mi, 15); \ + A##so ^= Do; \ + Bmu = ROL64(A##so, 56); \ + E##ma = Bma ^((~Bme)& Bmi ); \ + E##me = Bme ^((~Bmi)& Bmo ); \ + E##mi = Bmi ^((~Bmo)& Bmu ); \ + E##mo = Bmo ^((~Bmu)& Bma ); \ + E##mu = Bmu ^((~Bma)& Bme ); \ +\ + A##bi ^= Di; \ + Bsa = ROL64(A##bi, 62); \ + A##go ^= Do; \ + Bse = ROL64(A##go, 55); \ + A##ku ^= Du; \ + Bsi = ROL64(A##ku, 39); \ + A##ma ^= Da; \ + Bso = ROL64(A##ma, 41); \ + A##se ^= De; \ + Bsu = ROL64(A##se, 2); \ + E##sa = Bsa ^((~Bse)& Bsi ); \ + E##se = Bse ^((~Bsi)& Bso ); \ + E##si = Bsi ^((~Bso)& Bsu ); \ + E##so = Bso ^((~Bsu)& Bsa ); \ + E##su = Bsu ^((~Bsa)& Bse ); \ +\ + +#endif /* UseBebigokimisa */ + +#define copyFromState(X, state) \ + X##ba = state[ 0]; \ + X##be = state[ 1]; \ + X##bi = state[ 2]; \ + X##bo = state[ 3]; \ + X##bu = state[ 4]; \ + X##ga = state[ 5]; \ + X##ge = state[ 6]; \ + X##gi = state[ 7]; \ + X##go = state[ 8]; \ + X##gu = state[ 9]; \ + X##ka = state[10]; \ + X##ke = state[11]; \ + X##ki = state[12]; \ + X##ko = state[13]; \ + X##ku = state[14]; \ + X##ma = state[15]; \ + X##me = state[16]; \ + X##mi = state[17]; \ + X##mo = state[18]; \ + X##mu = state[19]; \ + X##sa = state[20]; \ + X##se = state[21]; \ + X##si = state[22]; \ + X##so = state[23]; \ + X##su = state[24]; \ + +#define copyToState(state, X) \ + state[ 0] = X##ba; \ + state[ 1] = X##be; \ + state[ 2] = X##bi; \ + state[ 3] = X##bo; \ + state[ 4] = X##bu; \ + state[ 5] = X##ga; \ + state[ 6] = X##ge; \ + state[ 7] = X##gi; \ + state[ 8] = X##go; \ + state[ 9] = X##gu; \ + state[10] = X##ka; \ + state[11] = X##ke; \ + state[12] = X##ki; \ + state[13] = X##ko; \ + state[14] = X##ku; \ + state[15] = X##ma; \ + state[16] = X##me; \ + state[17] = X##mi; \ + state[18] = X##mo; \ + state[19] = X##mu; \ + state[20] = X##sa; \ + state[21] = X##se; \ + state[22] = X##si; \ + state[23] = X##so; \ + state[24] = X##su; \ + +#define copyStateVariables(X, Y) \ + X##ba = Y##ba; \ + X##be = Y##be; \ + X##bi = Y##bi; \ + X##bo = Y##bo; \ + X##bu = Y##bu; \ + X##ga = Y##ga; \ + X##ge = Y##ge; \ + X##gi = Y##gi; \ + X##go = Y##go; \ + X##gu = Y##gu; \ + X##ka = Y##ka; \ + X##ke = Y##ke; \ + X##ki = Y##ki; \ + X##ko = Y##ko; \ + X##ku = Y##ku; \ + X##ma = Y##ma; \ + X##me = Y##me; \ + X##mi = Y##mi; \ + X##mo = Y##mo; \ + X##mu = Y##mu; \ + X##sa = Y##sa; \ + X##se = Y##se; \ + X##si = Y##si; \ + X##so = Y##so; \ + X##su = Y##su; \ + +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) +#define LOAD64(x) ((x)[0] \ + | (uint64_t) (x)[1] << 0x08 \ + | (uint64_t) (x)[2] << 0x10 \ + | (uint64_t) (x)[3] << 0x18 \ + | (uint64_t) (x)[4] << 0x20 \ + | (uint64_t) (x)[5] << 0x28 \ + | (uint64_t) (x)[6] << 0x30 \ + | (uint64_t) (x)[7] << 0x38 ) +#else +#define LOAD64(x) ((x)[7] \ + | (uint64_t) (x)[6] << 0x08 \ + | (uint64_t) (x)[5] << 0x10 \ + | (uint64_t) (x)[4] << 0x18 \ + | (uint64_t) (x)[3] << 0x20 \ + | (uint64_t) (x)[2] << 0x28 \ + | (uint64_t) (x)[1] << 0x30 \ + | (uint64_t) (x)[0] << 0x38 ) +#endif + +#define addInput(X, input, laneCount) \ + if (laneCount == 21) { \ + X##ba ^= LOAD64(input + 0x00); \ + X##be ^= LOAD64(input + 0x08); \ + X##bi ^= LOAD64(input + 0x10); \ + X##bo ^= LOAD64(input + 0x18); \ + X##bu ^= LOAD64(input + 0x20); \ + X##ga ^= LOAD64(input + 0x28); \ + X##ge ^= LOAD64(input + 0x30); \ + X##gi ^= LOAD64(input + 0x38); \ + X##go ^= LOAD64(input + 0x40); \ + X##gu ^= LOAD64(input + 0x48); \ + X##ka ^= LOAD64(input + 0x50); \ + X##ke ^= LOAD64(input + 0x58); \ + X##ki ^= LOAD64(input + 0x60); \ + X##ko ^= LOAD64(input + 0x68); \ + X##ku ^= LOAD64(input + 0x70); \ + X##ma ^= LOAD64(input + 0x78); \ + X##me ^= LOAD64(input + 0x80); \ + X##mi ^= LOAD64(input + 0x88); \ + X##mo ^= LOAD64(input + 0x90); \ + X##mu ^= LOAD64(input + 0x98); \ + X##sa ^= LOAD64(input + 0xA0); \ + } \ + else if (laneCount < 16) { \ + if (laneCount < 8) { \ + if (laneCount < 4) { \ + if (laneCount < 2) { \ + if (laneCount < 1) { \ + } \ + else { \ + X##ba ^= LOAD64(input + 0x00); \ + } \ + } \ + else { \ + X##ba ^= LOAD64(input + 0x00); \ + X##be ^= LOAD64(input + 0x08); \ + if (laneCount < 3) { \ + } \ + else { \ + X##bi ^= LOAD64(input + 0x10); \ + } \ + } \ + } \ + else { \ + X##ba ^= LOAD64(input + 0x00); \ + X##be ^= LOAD64(input + 0x08); \ + X##bi ^= LOAD64(input + 0x10); \ + X##bo ^= LOAD64(input + 0x18); \ + if (laneCount < 6) { \ + if (laneCount < 5) { \ + } \ + else { \ + X##bu ^= LOAD64(input + 0x20); \ + } \ + } \ + else { \ + X##bu ^= LOAD64(input + 0x20); \ + X##ga ^= LOAD64(input + 0x28); \ + if (laneCount < 7) { \ + } \ + else { \ + X##ge ^= LOAD64(input + 0x30); \ + } \ + } \ + } \ + } \ + else { \ + X##ba ^= LOAD64(input + 0x00); \ + X##be ^= LOAD64(input + 0x08); \ + X##bi ^= LOAD64(input + 0x10); \ + X##bo ^= LOAD64(input + 0x18); \ + X##bu ^= LOAD64(input + 0x20); \ + X##ga ^= LOAD64(input + 0x28); \ + X##ge ^= LOAD64(input + 0x30); \ + X##gi ^= LOAD64(input + 0x38); \ + if (laneCount < 12) { \ + if (laneCount < 10) { \ + if (laneCount < 9) { \ + } \ + else { \ + X##go ^= LOAD64(input + 0x40); \ + } \ + } \ + else { \ + X##go ^= LOAD64(input + 0x40); \ + X##gu ^= LOAD64(input + 0x48); \ + if (laneCount < 11) { \ + } \ + else { \ + X##ka ^= LOAD64(input + 0x50); \ + } \ + } \ + } \ + else { \ + X##go ^= LOAD64(input + 0x40); \ + X##gu ^= LOAD64(input + 0x48); \ + X##ka ^= LOAD64(input + 0x50); \ + X##ke ^= LOAD64(input + 0x58); \ + if (laneCount < 14) { \ + if (laneCount < 13) { \ + } \ + else { \ + X##ki ^= LOAD64(input + 0x60); \ + } \ + } \ + else { \ + X##ki ^= LOAD64(input + 0x60); \ + X##ko ^= LOAD64(input + 0x68); \ + if (laneCount < 15) { \ + } \ + else { \ + X##ku ^= LOAD64(input + 0x70); \ + } \ + } \ + } \ + } \ + } \ + else { \ + X##ba ^= LOAD64(input + 0x00); \ + X##be ^= LOAD64(input + 0x08); \ + X##bi ^= LOAD64(input + 0x10); \ + X##bo ^= LOAD64(input + 0x18); \ + X##bu ^= LOAD64(input + 0x20); \ + X##ga ^= LOAD64(input + 0x28); \ + X##ge ^= LOAD64(input + 0x30); \ + X##gi ^= LOAD64(input + 0x38); \ + X##go ^= LOAD64(input + 0x40); \ + X##gu ^= LOAD64(input + 0x48); \ + X##ka ^= LOAD64(input + 0x50); \ + X##ke ^= LOAD64(input + 0x58); \ + X##ki ^= LOAD64(input + 0x60); \ + X##ko ^= LOAD64(input + 0x68); \ + X##ku ^= LOAD64(input + 0x70); \ + X##ma ^= LOAD64(input + 0x78); \ + if (laneCount < 24) { \ + if (laneCount < 20) { \ + if (laneCount < 18) { \ + if (laneCount < 17) { \ + } \ + else { \ + X##me ^= LOAD64(input + 0x80); \ + } \ + } \ + else { \ + X##me ^= LOAD64(input + 0x80); \ + X##mi ^= LOAD64(input + 0x88); \ + if (laneCount < 19) { \ + } \ + else { \ + X##mo ^= LOAD64(input + 0x90); \ + } \ + } \ + } \ + else { \ + X##me ^= LOAD64(input + 0x80); \ + X##mi ^= LOAD64(input + 0x88); \ + X##mo ^= LOAD64(input + 0x90); \ + X##mu ^= LOAD64(input + 0x98); \ + if (laneCount < 22) { \ + if (laneCount < 21) { \ + } \ + else { \ + X##sa ^= LOAD64(input + 0xA0); \ + } \ + } \ + else { \ + X##sa ^= LOAD64(input + 0xA0); \ + X##se ^= LOAD64(input + 0xA8); \ + if (laneCount < 23) { \ + } \ + else { \ + X##si ^= LOAD64(input + 0xA8); \ + } \ + } \ + } \ + } \ + else { \ + X##me ^= LOAD64(input + 0x80); \ + X##mi ^= LOAD64(input + 0x88); \ + X##mo ^= LOAD64(input + 0x90); \ + X##mu ^= LOAD64(input + 0x98); \ + X##sa ^= LOAD64(input + 0xA0); \ + X##se ^= LOAD64(input + 0xA8); \ + X##si ^= LOAD64(input + 0xB0); \ + X##so ^= LOAD64(input + 0xB8); \ + if (laneCount < 25) { \ + } \ + else { \ + X##su ^= LOAD64(input + 0xC0); \ + } \ + } \ + } diff --git a/lib/liboqs/src/common/sha3/xkcp_low/KeccakP-1600/plain-64bits/KeccakP-1600-SnP.h b/lib/liboqs/src/common/sha3/xkcp_low/KeccakP-1600/plain-64bits/KeccakP-1600-SnP.h new file mode 100644 index 000000000..078fbc36a --- /dev/null +++ b/lib/liboqs/src/common/sha3/xkcp_low/KeccakP-1600/plain-64bits/KeccakP-1600-SnP.h @@ -0,0 +1,86 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +The Keccak-p permutations, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche. + +Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ + +--- + +Please refer to SnP-documentation.h for more details. +*/ + +#ifndef _KeccakP_1600_SnP_h_ +#define _KeccakP_1600_SnP_h_ + +#include "brg_endian.h" +#include "KeccakP-1600-opt64-config.h" + +#include <stddef.h> + +#define KeccakP1600_implementation_plain64 "generic 64-bit optimized implementation (" KeccakP1600_implementation_config ")" +#define KeccakP1600_stateSizeInBytes_plain64 200 +#define KeccakP1600_stateAlignment_plain64 8 +#define KeccakF1600_FastLoop_supported_plain64 +#define KeccakP1600_12rounds_FastLoop_supported_plain64 + +#if defined(ADD_SYMBOL_SUFFIX) +#define KECCAK_SYMBOL_SUFFIX plain64 +#define KECCAK_IMPL_NAMESPACE(x) x##_plain64 +#else +#define KECCAK_IMPL_NAMESPACE(x) x +#define KeccakP1600_implementation KeccakP1600_implementation_plain64 +#define KeccakP1600_stateSizeInBytes KeccakP1600_stateSizeInBytes_plain64 +#define KeccakP1600_stateAlignment KeccakP1600_stateAlignment_plain64 +#define KeccakF1600_FastLoop_supported KeccakF1600_FastLoop_supported_plain64 +#define KeccakP1600_12rounds_FastLoop_supported KeccakP1600_12rounds_FastLoop_supported_plain64 +#endif + +#define KeccakP1600_StaticInitialize KECCAK_IMPL_NAMESPACE(KeccakP1600_StaticInitialize) +void KeccakP1600_StaticInitialize(void); + +#define KeccakP1600_Initialize KECCAK_IMPL_NAMESPACE(KeccakP1600_Initialize) +void KeccakP1600_Initialize(void *state); + +#define KeccakP1600_AddByte KECCAK_IMPL_NAMESPACE(KeccakP1600_AddByte) +void KeccakP1600_AddByte(void *state, unsigned char data, unsigned int offset); + +#define KeccakP1600_AddBytes KECCAK_IMPL_NAMESPACE(KeccakP1600_AddBytes) +void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length); + +#define KeccakP1600_OverwriteBytes KECCAK_IMPL_NAMESPACE(KeccakP1600_OverwriteBytes) +void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length); + +#define KeccakP1600_OverwriteWithZeroes KECCAK_IMPL_NAMESPACE(KeccakP1600_OverwriteWithZeroes) +void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount); + +#define KeccakP1600_Permute_Nrounds KECCAK_IMPL_NAMESPACE(KeccakP1600_Permute_Nrounds) +void KeccakP1600_Permute_Nrounds(void *state, unsigned int nrounds); + +#define KeccakP1600_Permute_12rounds KECCAK_IMPL_NAMESPACE(KeccakP1600_Permute_12rounds) +void KeccakP1600_Permute_12rounds(void *state); + +#define KeccakP1600_Permute_24rounds KECCAK_IMPL_NAMESPACE(KeccakP1600_Permute_24rounds) +void KeccakP1600_Permute_24rounds(void *state); + +#define KeccakP1600_ExtractBytes KECCAK_IMPL_NAMESPACE(KeccakP1600_ExtractBytes) +void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length); + +#define KeccakP1600_ExtractAndAddBytes KECCAK_IMPL_NAMESPACE(KeccakP1600_ExtractAndAddBytes) +void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length); + +#define KeccakF1600_FastLoop_Absorb KECCAK_IMPL_NAMESPACE(KeccakF1600_FastLoop_Absorb) +size_t KeccakF1600_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen); + +#define KeccakP1600_12rounds_FastLoop_Absorb KECCAK_IMPL_NAMESPACE(KeccakP1600_12rounds_FastLoop_Absorb) +size_t KeccakP1600_12rounds_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen); + +#endif diff --git a/lib/liboqs/src/common/sha3/xkcp_low/KeccakP-1600/plain-64bits/KeccakP-1600-opt64-config.h b/lib/liboqs/src/common/sha3/xkcp_low/KeccakP-1600/plain-64bits/KeccakP-1600-opt64-config.h new file mode 100644 index 000000000..085b6c958 --- /dev/null +++ b/lib/liboqs/src/common/sha3/xkcp_low/KeccakP-1600/plain-64bits/KeccakP-1600-opt64-config.h @@ -0,0 +1,6 @@ +/* +This file defines some parameters of the implementation in the parent directory. +*/ + +#define KeccakP1600_implementation_config "all rounds unrolled" +#define KeccakP1600_fullUnrolling diff --git a/lib/liboqs/src/common/sha3/xkcp_low/KeccakP-1600/plain-64bits/KeccakP-1600-opt64.c b/lib/liboqs/src/common/sha3/xkcp_low/KeccakP-1600/plain-64bits/KeccakP-1600-opt64.c new file mode 100644 index 000000000..d813a3679 --- /dev/null +++ b/lib/liboqs/src/common/sha3/xkcp_low/KeccakP-1600/plain-64bits/KeccakP-1600-opt64.c @@ -0,0 +1,519 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +The Keccak-p permutations, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche. + +Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ + +--- + +This file implements Keccak-p[1600] in a SnP-compatible way. +Please refer to SnP-documentation.h for more details. + +This implementation comes with KeccakP-1600-SnP.h in the same folder. +Please refer to LowLevel.build for the exact list of other files it must be combined with. +*/ + +#include <stdint.h> +#include <string.h> +#include <stdlib.h> +#include "brg_endian.h" +#include "KeccakP-1600-SnP.h" +#include "KeccakP-1600-opt64-config.h" + +#define UseBebigokimisa + +#if defined(_MSC_VER) +#define ROL64(a, offset) _rotl64(a, offset) +#elif defined(KeccakP1600_useSHLD) +#define ROL64(x,N) ({ \ + register uint64_t __out; \ + register uint64_t __in = x; \ + __asm__ ("shld %2,%0,%0" : "=r"(__out) : "0"(__in), "i"(N)); \ + __out; \ + }) +#else +#define ROL64(a, offset) ((((uint64_t)a) << offset) ^ (((uint64_t)a) >> (64-offset))) +#endif + +#include "KeccakP-1600-64.macros" +#define FullUnrolling +#include "KeccakP-1600-unrolling.macros" +#include "SnP-Relaned.h" + +static const uint64_t KeccakF1600RoundConstants[24] = { + 0x0000000000000001ULL, + 0x0000000000008082ULL, + 0x800000000000808aULL, + 0x8000000080008000ULL, + 0x000000000000808bULL, + 0x0000000080000001ULL, + 0x8000000080008081ULL, + 0x8000000000008009ULL, + 0x000000000000008aULL, + 0x0000000000000088ULL, + 0x0000000080008009ULL, + 0x000000008000000aULL, + 0x000000008000808bULL, + 0x800000000000008bULL, + 0x8000000000008089ULL, + 0x8000000000008003ULL, + 0x8000000000008002ULL, + 0x8000000000000080ULL, + 0x000000000000800aULL, + 0x800000008000000aULL, + 0x8000000080008081ULL, + 0x8000000000008080ULL, + 0x0000000080000001ULL, + 0x8000000080008008ULL +}; + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_StaticInitialize(void) { } + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_Initialize(void *state) { + memset(state, 0, 200); + ((uint64_t *)state)[ 1] = ~(uint64_t)0; + ((uint64_t *)state)[ 2] = ~(uint64_t)0; + ((uint64_t *)state)[ 8] = ~(uint64_t)0; + ((uint64_t *)state)[12] = ~(uint64_t)0; + ((uint64_t *)state)[17] = ~(uint64_t)0; + ((uint64_t *)state)[20] = ~(uint64_t)0; +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_AddBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length) { +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + uint64_t lane; + if (length == 0) { + return; + } + if (length == 1) { + lane = data[0]; + } else { + lane = 0; + memcpy(&lane, data, length); + } + lane <<= offset * 8; +#else + uint64_t lane = 0; + unsigned int i; + for (i = 0; i < length; i++) { + lane |= ((uint64_t)data[i]) << ((i + offset) * 8); + } +#endif + ((uint64_t *)state)[lanePosition] ^= lane; +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_AddLanes(void *state, const unsigned char *data, unsigned int laneCount) { +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + unsigned int i = 0; + /* If either pointer is misaligned, fall back to byte-wise xor. */ + if (((((uintptr_t)state) & 7) != 0) || ((((uintptr_t)data) & 7) != 0)) { + for (i = 0; i < laneCount * 8; i++) { + ((unsigned char *)state)[i] ^= data[i]; + } + } else { + /* Otherwise... */ + for ( ; (i + 8) <= laneCount; i += 8) { + ((uint64_t *)state)[i + 0] ^= ((const uint64_t *)data)[i + 0]; + ((uint64_t *)state)[i + 1] ^= ((const uint64_t *)data)[i + 1]; + ((uint64_t *)state)[i + 2] ^= ((const uint64_t *)data)[i + 2]; + ((uint64_t *)state)[i + 3] ^= ((const uint64_t *)data)[i + 3]; + ((uint64_t *)state)[i + 4] ^= ((const uint64_t *)data)[i + 4]; + ((uint64_t *)state)[i + 5] ^= ((const uint64_t *)data)[i + 5]; + ((uint64_t *)state)[i + 6] ^= ((const uint64_t *)data)[i + 6]; + ((uint64_t *)state)[i + 7] ^= ((const uint64_t *)data)[i + 7]; + } + for ( ; (i + 4) <= laneCount; i += 4) { + ((uint64_t *)state)[i + 0] ^= ((const uint64_t *)data)[i + 0]; + ((uint64_t *)state)[i + 1] ^= ((const uint64_t *)data)[i + 1]; + ((uint64_t *)state)[i + 2] ^= ((const uint64_t *)data)[i + 2]; + ((uint64_t *)state)[i + 3] ^= ((const uint64_t *)data)[i + 3]; + } + for ( ; (i + 2) <= laneCount; i += 2) { + ((uint64_t *)state)[i + 0] ^= ((const uint64_t *)data)[i + 0]; + ((uint64_t *)state)[i + 1] ^= ((const uint64_t *)data)[i + 1]; + } + if (i < laneCount) { + ((uint64_t *)state)[i + 0] ^= ((const uint64_t *)data)[i + 0]; + } + } +#else + unsigned int i; + const uint8_t *curData = data; + for (i = 0; i < laneCount; i++, curData += 8) { + uint64_t lane = (uint64_t)curData[0] + | ((uint64_t)curData[1] << 8) + | ((uint64_t)curData[2] << 16) + | ((uint64_t)curData[3] << 24) + | ((uint64_t)curData[4] << 32) + | ((uint64_t)curData[5] << 40) + | ((uint64_t)curData[6] << 48) + | ((uint64_t)curData[7] << 56); + ((uint64_t *)state)[i] ^= lane; + } +#endif +} + +/* ---------------------------------------------------------------- */ + +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) +void KeccakP1600_AddByte(void *state, unsigned char byte, unsigned int offset) { + ((unsigned char *)state)[offset] ^= byte; +} +#else +void KeccakP1600_AddByte(void *state, unsigned char byte, unsigned int offset) { + uint64_t lane = byte; + lane <<= (offset % 8) * 8; + ((uint64_t *)state)[offset / 8] ^= lane; +} +#endif + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) { + SnP_AddBytes(state, data, offset, length, KeccakP1600_AddLanes, KeccakP1600_AddBytesInLane, 8); +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_OverwriteBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length) { +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) { + unsigned int i; + for (i = 0; i < length; i++) { + ((unsigned char *)state)[lanePosition * 8 + offset + i] = ~data[i]; + } + } else { + memcpy((unsigned char *)state + lanePosition * 8 + offset, data, length); + } +#else + uint64_t lane = ((uint64_t *)state)[lanePosition]; + unsigned int i; + for (i = 0; i < length; i++) { + lane &= ~((uint64_t)0xFF << ((offset + i) * 8)); + if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) { + lane |= (uint64_t)(data[i] ^ 0xFF) << ((offset + i) * 8); + } else { + lane |= (uint64_t)data[i] << ((offset + i) * 8); + } + } + ((uint64_t *)state)[lanePosition] = lane; +#endif +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_OverwriteLanes(void *state, const unsigned char *data, unsigned int laneCount) { +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + unsigned int lanePosition; + + for (lanePosition = 0; lanePosition < laneCount; lanePosition++) { + memcpy(((uint64_t *)state) + lanePosition, data, sizeof(uint64_t)); + if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) { + ((uint64_t *)state)[lanePosition] = ~((uint64_t *)state)[lanePosition]; + } + } +#else + unsigned int lanePosition; + const uint8_t *curData = data; + for (lanePosition = 0; lanePosition < laneCount; lanePosition++, curData += 8) { + uint64_t lane = (uint64_t)curData[0] + | ((uint64_t)curData[1] << 8) + | ((uint64_t)curData[2] << 16) + | ((uint64_t)curData[3] << 24) + | ((uint64_t)curData[4] << 32) + | ((uint64_t)curData[5] << 40) + | ((uint64_t)curData[6] << 48) + | ((uint64_t)curData[7] << 56); + if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) { + ((uint64_t *)state)[lanePosition] = ~lane; + } else { + ((uint64_t *)state)[lanePosition] = lane; + } + } +#endif +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) { + SnP_OverwriteBytes(state, data, offset, length, KeccakP1600_OverwriteLanes, KeccakP1600_OverwriteBytesInLane, 8); +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount) { +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + unsigned int lanePosition; + + for (lanePosition = 0; lanePosition < byteCount / 8; lanePosition++) + if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) { + ((uint64_t *)state)[lanePosition] = ~(uint64_t)0; + } else { + ((uint64_t *)state)[lanePosition] = 0; + } + if (byteCount % 8 != 0) { + lanePosition = byteCount / 8; + if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) { + memset((unsigned char *)state + lanePosition * 8, 0xFF, byteCount % 8); + } else { + memset((unsigned char *)state + lanePosition * 8, 0, byteCount % 8); + } + } +#else + unsigned int i, j; + for (i = 0; i < byteCount; i += 8) { + unsigned int lanePosition = i / 8; + if (i + 8 <= byteCount) { + if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) { + ((uint64_t *)state)[lanePosition] = ~(uint64_t)0; + } else { + ((uint64_t *)state)[lanePosition] = 0; + } + } else { + uint64_t lane = ((uint64_t *)state)[lanePosition]; + for (j = 0; j < byteCount % 8; j++) { + if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) { + lane |= (uint64_t)0xFF << (j * 8); + } else { + lane &= ~((uint64_t)0xFF << (j * 8)); + } + } + ((uint64_t *)state)[lanePosition] = lane; + } + } +#endif +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_Permute_Nrounds(void *state, unsigned int nr) { + declareABCDE + unsigned int i; + uint64_t *stateAsLanes = (uint64_t *)state; + + copyFromState(A, stateAsLanes) + roundsN(nr) + copyToState(stateAsLanes, A) + +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_Permute_24rounds(void *state) { + declareABCDE + uint64_t *stateAsLanes = (uint64_t *)state; + + copyFromState(A, stateAsLanes) + rounds24 + copyToState(stateAsLanes, A) +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_Permute_12rounds(void *state) { + declareABCDE + uint64_t *stateAsLanes = (uint64_t *)state; + + copyFromState(A, stateAsLanes) + rounds12 + copyToState(stateAsLanes, A) +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_ExtractBytesInLane(const void *state, unsigned int lanePosition, unsigned char *data, unsigned int offset, unsigned int length) { + uint64_t lane = ((const uint64_t *)state)[lanePosition]; + if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) { + lane = ~lane; + } +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + { + uint64_t lane1[1]; + lane1[0] = lane; + memcpy(data, (uint8_t *)lane1 + offset, length); + } +#else + unsigned int i; + lane >>= offset * 8; + for (i = 0; i < length; i++) { + data[i] = lane & 0xFF; + lane >>= 8; + } +#endif +} + +/* ---------------------------------------------------------------- */ + +#if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN) +static void fromWordToBytes(uint8_t *bytes, const uint64_t word) { + unsigned int i; + + for (i = 0; i < (64 / 8); i++) { + bytes[i] = (word >> (8 * i)) & 0xFF; + } +} +#endif + +void KeccakP1600_ExtractLanes(const void *state, unsigned char *data, unsigned int laneCount) { +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + memcpy(data, state, laneCount * 8); +#else + for (unsigned int i = 0; i < laneCount; i++) { + fromWordToBytes(data + (i * 8), ((const uint64_t *)state)[i]); + } +#endif +#define COMPL(lane) for(unsigned int i=0; i<8; i++) data[8*lane+i] = ~data[8*lane+i] + if (laneCount > 1) { + COMPL(1); + if (laneCount > 2) { + COMPL(2); + if (laneCount > 8) { + COMPL(8); + if (laneCount > 12) { + COMPL(12); + if (laneCount > 17) { + COMPL(17); + if (laneCount > 20) { + COMPL(20); + } + } + } + } + } + } +#undef COMPL +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length) { + SnP_ExtractBytes(state, data, offset, length, KeccakP1600_ExtractLanes, KeccakP1600_ExtractBytesInLane, 8); +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_ExtractAndAddBytesInLane(const void *state, unsigned int lanePosition, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length) { + uint64_t lane = ((const uint64_t *)state)[lanePosition]; + if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) { + lane = ~lane; + } +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + { + unsigned int i; + for (i = 0; i < length; i++) { + output[i] = input[i] ^ ((uint8_t *)&lane)[offset + i]; + } + } +#else + unsigned int i; + lane >>= offset * 8; + for (i = 0; i < length; i++) { + output[i] = input[i] ^ (lane & 0xFF); + lane >>= 8; + } +#endif +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_ExtractAndAddLanes(const void *state, const unsigned char *input, unsigned char *output, unsigned int laneCount) { + unsigned int i; +#if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN) + unsigned char temp[8]; + unsigned int j; +#else + uint64_t lane; +#endif + + for (i = 0; i < laneCount; i++) { +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + memcpy(&lane, input + 8 * i, sizeof(uint64_t)); + lane ^= ((const uint64_t *)state)[i]; + memcpy(output + 8 * i, &lane, sizeof(uint64_t)); +#else + fromWordToBytes(temp, ((const uint64_t *)state)[i]); + for (j = 0; j < 8; j++) { + output[i * 8 + j] = input[i * 8 + j] ^ temp[j]; + } +#endif + } +#define COMPL(lane) for(i=0; i<8; i++) output[8*lane+i] = ~output[8*lane+i] + if (laneCount > 1) { + COMPL(1); + if (laneCount > 2) { + COMPL(2); + if (laneCount > 8) { + COMPL(8); + if (laneCount > 12) { + COMPL(12); + if (laneCount > 17) { + COMPL(17); + if (laneCount > 20) { + COMPL(20); + } + } + } + } + } + } +#undef COMPL +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length) { + SnP_ExtractAndAddBytes(state, input, output, offset, length, KeccakP1600_ExtractAndAddLanes, KeccakP1600_ExtractAndAddBytesInLane, 8); +} + +/* ---------------------------------------------------------------- */ + +size_t KeccakF1600_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen) { + size_t originalDataByteLen = dataByteLen; + declareABCDE + uint64_t *stateAsLanes = (uint64_t *)state; + + copyFromState(A, stateAsLanes) + while (dataByteLen >= laneCount * 8) { + addInput(A, data, laneCount) + rounds24 + data += laneCount * 8; + dataByteLen -= laneCount * 8; + } + copyToState(stateAsLanes, A) + return originalDataByteLen - dataByteLen; +} + +/* ---------------------------------------------------------------- */ + +size_t KeccakP1600_12rounds_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen) { + size_t originalDataByteLen = dataByteLen; + declareABCDE + uint64_t *stateAsLanes = (uint64_t *)state; + + copyFromState(A, stateAsLanes) + while (dataByteLen >= laneCount * 8) { + addInput(A, data, laneCount) + rounds12 + data += laneCount * 8; + dataByteLen -= laneCount * 8; + } + copyToState(stateAsLanes, A) + return originalDataByteLen - dataByteLen; +} diff --git a/lib/liboqs/src/common/sha3/xkcp_low/KeccakP-1600/plain-64bits/KeccakP-1600-unrolling.macros b/lib/liboqs/src/common/sha3/xkcp_low/KeccakP-1600/plain-64bits/KeccakP-1600-unrolling.macros new file mode 100644 index 000000000..9f7200226 --- /dev/null +++ b/lib/liboqs/src/common/sha3/xkcp_low/KeccakP-1600/plain-64bits/KeccakP-1600-unrolling.macros @@ -0,0 +1,305 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +The Keccak-p permutations, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche. + +Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#if (defined(FullUnrolling)) +#define rounds24 \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta( 0, A, E) \ + thetaRhoPiChiIotaPrepareTheta( 1, E, A) \ + thetaRhoPiChiIotaPrepareTheta( 2, A, E) \ + thetaRhoPiChiIotaPrepareTheta( 3, E, A) \ + thetaRhoPiChiIotaPrepareTheta( 4, A, E) \ + thetaRhoPiChiIotaPrepareTheta( 5, E, A) \ + thetaRhoPiChiIotaPrepareTheta( 6, A, E) \ + thetaRhoPiChiIotaPrepareTheta( 7, E, A) \ + thetaRhoPiChiIotaPrepareTheta( 8, A, E) \ + thetaRhoPiChiIotaPrepareTheta( 9, E, A) \ + thetaRhoPiChiIotaPrepareTheta(10, A, E) \ + thetaRhoPiChiIotaPrepareTheta(11, E, A) \ + thetaRhoPiChiIotaPrepareTheta(12, A, E) \ + thetaRhoPiChiIotaPrepareTheta(13, E, A) \ + thetaRhoPiChiIotaPrepareTheta(14, A, E) \ + thetaRhoPiChiIotaPrepareTheta(15, E, A) \ + thetaRhoPiChiIotaPrepareTheta(16, A, E) \ + thetaRhoPiChiIotaPrepareTheta(17, E, A) \ + thetaRhoPiChiIotaPrepareTheta(18, A, E) \ + thetaRhoPiChiIotaPrepareTheta(19, E, A) \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + +#define rounds12 \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta(12, A, E) \ + thetaRhoPiChiIotaPrepareTheta(13, E, A) \ + thetaRhoPiChiIotaPrepareTheta(14, A, E) \ + thetaRhoPiChiIotaPrepareTheta(15, E, A) \ + thetaRhoPiChiIotaPrepareTheta(16, A, E) \ + thetaRhoPiChiIotaPrepareTheta(17, E, A) \ + thetaRhoPiChiIotaPrepareTheta(18, A, E) \ + thetaRhoPiChiIotaPrepareTheta(19, E, A) \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + +#define rounds6 \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta(18, A, E) \ + thetaRhoPiChiIotaPrepareTheta(19, E, A) \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + +#define rounds4 \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + +#elif (Unrolling == 12) +#define rounds24 \ + prepareTheta \ + for(i=0; i<24; i+=12) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+ 1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+ 2, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+ 3, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+ 4, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+ 5, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+ 6, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+ 7, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+ 8, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+ 9, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+10, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+11, E, A) \ + } \ + +#define rounds12 \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta(12, A, E) \ + thetaRhoPiChiIotaPrepareTheta(13, E, A) \ + thetaRhoPiChiIotaPrepareTheta(14, A, E) \ + thetaRhoPiChiIotaPrepareTheta(15, E, A) \ + thetaRhoPiChiIotaPrepareTheta(16, A, E) \ + thetaRhoPiChiIotaPrepareTheta(17, E, A) \ + thetaRhoPiChiIotaPrepareTheta(18, A, E) \ + thetaRhoPiChiIotaPrepareTheta(19, E, A) \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + +#define rounds6 \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta(18, A, E) \ + thetaRhoPiChiIotaPrepareTheta(19, E, A) \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + +#define rounds4 \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + +#elif (Unrolling == 6) +#define rounds24 \ + prepareTheta \ + for(i=0; i<24; i+=6) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+4, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \ + } \ + +#define rounds12 \ + prepareTheta \ + for(i=12; i<24; i+=6) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+4, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \ + } \ + +#define rounds6 \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta(18, A, E) \ + thetaRhoPiChiIotaPrepareTheta(19, E, A) \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + +#define rounds4 \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + +#elif (Unrolling == 4) +#define rounds24 \ + prepareTheta \ + for(i=0; i<24; i+=4) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \ + } \ + +#define rounds12 \ + prepareTheta \ + for(i=12; i<24; i+=4) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \ + } \ + +#define rounds6 \ + prepareTheta \ + for(i=18; i<24; i+=2) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + } \ + +#define rounds4 \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + +#elif (Unrolling == 3) +#define rounds24 \ + prepareTheta \ + for(i=0; i<24; i+=3) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ + copyStateVariables(A, E) \ + } \ + +#define rounds12 \ + prepareTheta \ + for(i=12; i<24; i+=3) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ + copyStateVariables(A, E) \ + } \ + +#define rounds6 \ + prepareTheta \ + for(i=18; i<24; i+=3) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ + copyStateVariables(A, E) \ + } \ + +#define rounds4 \ + prepareTheta \ + for(i=20; i<24; i+=2) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + } \ + +#elif (Unrolling == 2) +#define rounds24 \ + prepareTheta \ + for(i=0; i<24; i+=2) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + } \ + +#define rounds12 \ + prepareTheta \ + for(i=12; i<24; i+=2) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + } \ + +#define rounds6 \ + prepareTheta \ + for(i=18; i<24; i+=2) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + } \ + +#define rounds4 \ + prepareTheta \ + for(i=20; i<24; i+=2) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + } \ + +#elif (Unrolling == 1) +#define rounds24 \ + prepareTheta \ + for(i=0; i<24; i++) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + copyStateVariables(A, E) \ + } \ + +#define rounds12 \ + prepareTheta \ + for(i=12; i<24; i++) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + copyStateVariables(A, E) \ + } \ + +#define rounds6 \ + prepareTheta \ + for(i=18; i<24; i++) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + copyStateVariables(A, E) \ + } \ + +#define rounds4 \ + prepareTheta \ + for(i=20; i<24; i++) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + copyStateVariables(A, E) \ + } \ + +#else +#error "Unrolling is not correctly specified!" +#endif + +#define roundsN(__nrounds) \ + prepareTheta \ + i = 24 - (__nrounds); \ + if ((i&1) != 0) { \ + thetaRhoPiChiIotaPrepareTheta(i, A, E) \ + copyStateVariables(A, E) \ + ++i; \ + } \ + for( /* empty */; i<24; i+=2) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + } diff --git a/lib/liboqs/src/common/sha3/xkcp_low/KeccakP-1600/plain-64bits/Makefile b/lib/liboqs/src/common/sha3/xkcp_low/KeccakP-1600/plain-64bits/Makefile new file mode 100644 index 000000000..fe090f3ff --- /dev/null +++ b/lib/liboqs/src/common/sha3/xkcp_low/KeccakP-1600/plain-64bits/Makefile @@ -0,0 +1,49 @@ +#! gmake +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +####################################################################### +# (1) Include initial platform-independent assignments (MANDATORY). # +####################################################################### + +include manifest.mn + +####################################################################### +# (2) Include "global" configuration information. (OPTIONAL) # +####################################################################### + +USE_GCOV = +include $(CORE_DEPTH)/coreconf/config.mk + +####################################################################### +# (3) Include "component" configuration information. (OPTIONAL) # +####################################################################### + + + +####################################################################### +# (4) Include "local" platform-dependent assignments (OPTIONAL). # +####################################################################### + +include config.mk + +####################################################################### +# (5) Execute "global" rules. (OPTIONAL) # +####################################################################### + +include $(CORE_DEPTH)/coreconf/rules.mk + +####################################################################### +# (6) Execute "component" rules. (OPTIONAL) # +####################################################################### + + + +####################################################################### +# (7) Execute "local" rules. (OPTIONAL). # +####################################################################### + +WARNING_CFLAGS = $(NULL) + diff --git a/lib/liboqs/src/common/sha3/xkcp_low/KeccakP-1600/plain-64bits/SnP-Relaned.h b/lib/liboqs/src/common/sha3/xkcp_low/KeccakP-1600/plain-64bits/SnP-Relaned.h new file mode 100644 index 000000000..631fb5ae2 --- /dev/null +++ b/lib/liboqs/src/common/sha3/xkcp_low/KeccakP-1600/plain-64bits/SnP-Relaned.h @@ -0,0 +1,141 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ + +--- + +This file contains macros that help implement a permutation in a SnP-compatible way. +It converts an implementation that implement state input/output functions +in a lane-oriented fashion (i.e., using SnP_AddLanes() and SnP_AddBytesInLane, +and similarly for Overwite, Extract and ExtractAndAdd) to the byte-oriented SnP. +Please refer to SnP-documentation.h for more details. +*/ + +#ifndef _SnP_Relaned_h_ +#define _SnP_Relaned_h_ + +#define SnP_AddBytes(state, data, offset, length, SnP_AddLanes, SnP_AddBytesInLane, SnP_laneLengthInBytes) \ + { \ + if ((offset) == 0) { \ + SnP_AddLanes(state, data, (length)/SnP_laneLengthInBytes); \ + SnP_AddBytesInLane(state, \ + (length)/SnP_laneLengthInBytes, \ + (data)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \ + 0, \ + (length)%SnP_laneLengthInBytes); \ + } \ + else { \ + unsigned int _sizeLeft = (length); \ + unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \ + unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \ + const unsigned char *_curData = (data); \ + while(_sizeLeft > 0) { \ + unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \ + if (_bytesInLane > _sizeLeft) \ + _bytesInLane = _sizeLeft; \ + SnP_AddBytesInLane(state, _lanePosition, _curData, _offsetInLane, _bytesInLane); \ + _sizeLeft -= _bytesInLane; \ + _lanePosition++; \ + _offsetInLane = 0; \ + _curData += _bytesInLane; \ + } \ + } \ + } + +#define SnP_OverwriteBytes(state, data, offset, length, SnP_OverwriteLanes, SnP_OverwriteBytesInLane, SnP_laneLengthInBytes) \ + { \ + if ((offset) == 0) { \ + SnP_OverwriteLanes(state, data, (length)/SnP_laneLengthInBytes); \ + SnP_OverwriteBytesInLane(state, \ + (length)/SnP_laneLengthInBytes, \ + (data)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \ + 0, \ + (length)%SnP_laneLengthInBytes); \ + } \ + else { \ + unsigned int _sizeLeft = (length); \ + unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \ + unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \ + const unsigned char *_curData = (data); \ + while(_sizeLeft > 0) { \ + unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \ + if (_bytesInLane > _sizeLeft) \ + _bytesInLane = _sizeLeft; \ + SnP_OverwriteBytesInLane(state, _lanePosition, _curData, _offsetInLane, _bytesInLane); \ + _sizeLeft -= _bytesInLane; \ + _lanePosition++; \ + _offsetInLane = 0; \ + _curData += _bytesInLane; \ + } \ + } \ + } + +#define SnP_ExtractBytes(state, data, offset, length, SnP_ExtractLanes, SnP_ExtractBytesInLane, SnP_laneLengthInBytes) \ + { \ + if ((offset) == 0) { \ + SnP_ExtractLanes(state, data, (length)/SnP_laneLengthInBytes); \ + SnP_ExtractBytesInLane(state, \ + (length)/SnP_laneLengthInBytes, \ + (data)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \ + 0, \ + (length)%SnP_laneLengthInBytes); \ + } \ + else { \ + unsigned int _sizeLeft = (length); \ + unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \ + unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \ + unsigned char *_curData = (data); \ + while(_sizeLeft > 0) { \ + unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \ + if (_bytesInLane > _sizeLeft) \ + _bytesInLane = _sizeLeft; \ + SnP_ExtractBytesInLane(state, _lanePosition, _curData, _offsetInLane, _bytesInLane); \ + _sizeLeft -= _bytesInLane; \ + _lanePosition++; \ + _offsetInLane = 0; \ + _curData += _bytesInLane; \ + } \ + } \ + } + +#define SnP_ExtractAndAddBytes(state, input, output, offset, length, SnP_ExtractAndAddLanes, SnP_ExtractAndAddBytesInLane, SnP_laneLengthInBytes) \ + { \ + if ((offset) == 0) { \ + SnP_ExtractAndAddLanes(state, input, output, (length)/SnP_laneLengthInBytes); \ + SnP_ExtractAndAddBytesInLane(state, \ + (length)/SnP_laneLengthInBytes, \ + (input)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \ + (output)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \ + 0, \ + (length)%SnP_laneLengthInBytes); \ + } \ + else { \ + unsigned int _sizeLeft = (length); \ + unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \ + unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \ + const unsigned char *_curInput = (input); \ + unsigned char *_curOutput = (output); \ + while(_sizeLeft > 0) { \ + unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \ + if (_bytesInLane > _sizeLeft) \ + _bytesInLane = _sizeLeft; \ + SnP_ExtractAndAddBytesInLane(state, _lanePosition, _curInput, _curOutput, _offsetInLane, _bytesInLane); \ + _sizeLeft -= _bytesInLane; \ + _lanePosition++; \ + _offsetInLane = 0; \ + _curInput += _bytesInLane; \ + _curOutput += _bytesInLane; \ + } \ + } \ + } + +#endif diff --git a/lib/liboqs/src/common/sha3/xkcp_low/KeccakP-1600/plain-64bits/brg_endian.h b/lib/liboqs/src/common/sha3/xkcp_low/KeccakP-1600/plain-64bits/brg_endian.h new file mode 100644 index 000000000..efb408ff0 --- /dev/null +++ b/lib/liboqs/src/common/sha3/xkcp_low/KeccakP-1600/plain-64bits/brg_endian.h @@ -0,0 +1,121 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The redistribution and use of this software (with or without changes) + is allowed without the payment of fees or royalties provided that: + + 1. source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + 2. binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation; + + 3. the name of the copyright holder is not used to endorse products + built using this software without specific written permission. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 20/12/2007 + Changes for ARM 9/9/2010 +*/ + +#ifndef _BRG_ENDIAN_H +#define _BRG_ENDIAN_H + +#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */ +#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */ + + +/* Now attempt to set the define for platform byte order using any */ +/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */ +/* seem to encompass most endian symbol definitions */ + +#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN ) +# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN ) +# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( _BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( _LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN ) +# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ ) +# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +/* if the platform byte order could not be determined, then try to */ +/* set this define using common machine defines */ +#if !defined(PLATFORM_BYTE_ORDER) + +#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \ + defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \ + defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \ + defined( vax ) || defined( vms ) || defined( VMS ) || \ + defined( __VMS ) || defined( _M_X64 ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN + +#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \ + defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \ + defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \ + defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \ + defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \ + defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \ + defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX ) || \ + defined( __s390__ ) || defined( __s390x__ ) || defined( __zarch__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN + +#elif defined(__arm__) +# ifdef __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# else +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#else +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#endif + +#endif diff --git a/lib/liboqs/src/common/sha3/xkcp_low/KeccakP-1600/plain-64bits/config.mk b/lib/liboqs/src/common/sha3/xkcp_low/KeccakP-1600/plain-64bits/config.mk new file mode 100644 index 000000000..b28c9ce64 --- /dev/null +++ b/lib/liboqs/src/common/sha3/xkcp_low/KeccakP-1600/plain-64bits/config.mk @@ -0,0 +1,17 @@ +# DO NOT EDIT: generated from config.mk.subdirs.template +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# add fixes for platform integration issues here. +# +# liboqs programs expect the public include files to be in oqs/xxxx, +# So we put liboqs in it's own module, oqs, and point to the dist files +INCLUDES += -I$(CORE_DEPTH)/lib/liboqs/src/common/pqclean_shims -I$(CORE_DEPTH)/lib/liboqs/src/common/sha3/xkcp_low/KeccakP-1600/plain-64bits +DEFINES += + +ifeq ($(OS_ARCH), Darwin) +DEFINES += -DOQS_HAVE_ALIGNED_ALLOC -DOQS_HAVE_MEMALIGN -DOQS_HAVE_POSIX_MEMALIGN +endif + diff --git a/lib/liboqs/src/common/sha3/xkcp_low/KeccakP-1600/plain-64bits/manifest.mn b/lib/liboqs/src/common/sha3/xkcp_low/KeccakP-1600/plain-64bits/manifest.mn new file mode 100644 index 000000000..31ee9a8c9 --- /dev/null +++ b/lib/liboqs/src/common/sha3/xkcp_low/KeccakP-1600/plain-64bits/manifest.mn @@ -0,0 +1,23 @@ +# DO NOT EDIT: generated from manifest.mn.subdirs.template +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +CORE_DEPTH = ../../../../../../../.. + +MODULE = oqs + +LIBRARY_NAME = oqs_src_common_sha3_xkcp_low_KeccakP-1600_plain-64bits +SHARED_LIBRARY = $(NULL) + +CSRCS = \ + KeccakP-1600-opt64.c \ + $(NULL) + +# only add module debugging in opt builds if DEBUG_PKCS11 is set +ifdef DEBUG_PKCS11 + DEFINES += -DDEBUG_MODULE +endif + +# This part of the code, including all sub-dirs, can be optimized for size +export ALLOW_OPT_CODE_SIZE = 1 diff --git a/lib/liboqs/src/common/sha3/xkcp_low/KeccakP-1600/plain-64bits/plain-64bits.gyp b/lib/liboqs/src/common/sha3/xkcp_low/KeccakP-1600/plain-64bits/plain-64bits.gyp new file mode 100644 index 000000000..aa4fdfa5c --- /dev/null +++ b/lib/liboqs/src/common/sha3/xkcp_low/KeccakP-1600/plain-64bits/plain-64bits.gyp @@ -0,0 +1,39 @@ +# DO NOT EDIT: generated from subdir.gyp.template +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +{ + 'includes': [ + '../../../../../../../../coreconf/config.gypi' + ], + 'targets': [ + { + 'target_name': 'oqs_src_common_sha3_xkcp_low_KeccakP-1600_plain-64bits', + 'type': 'static_library', + 'sources': [ + 'KeccakP-1600-opt64.c', + ], + 'dependencies': [ + '<(DEPTH)/exports.gyp:nss_exports' + ] + } + ], + 'target_defaults': { + 'defines': [ + ], + 'include_dirs': [ + '<(DEPTH)/lib/liboqs/src/common/pqclean_shims', + '<(DEPTH)/lib/liboqs/src/common/sha3/xkcp_low/KeccakP-1600/plain-64bits', + ], + [ 'OS=="mac"', { + 'defines': [ + 'OQS_HAVE_POSIX_MEMALIGN', + 'OQS_HAVE_ALIGNED_ALLOC', + 'OQS_HAVE_MEMALIGN' + ] + }] + }, + 'variables': { + 'module': 'oqs' + } +} |