summaryrefslogtreecommitdiff
path: root/Modules/_sha3/kcp/KeccakP-1600-opt64.c
diff options
context:
space:
mode:
Diffstat (limited to 'Modules/_sha3/kcp/KeccakP-1600-opt64.c')
-rw-r--r--Modules/_sha3/kcp/KeccakP-1600-opt64.c474
1 files changed, 474 insertions, 0 deletions
diff --git a/Modules/_sha3/kcp/KeccakP-1600-opt64.c b/Modules/_sha3/kcp/KeccakP-1600-opt64.c
new file mode 100644
index 0000000000..c90010dd92
--- /dev/null
+++ b/Modules/_sha3/kcp/KeccakP-1600-opt64.c
@@ -0,0 +1,474 @@
+/*
+Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
+Joan Daemen, Michaƫl Peeters, Gilles Van Assche and Ronny Van Keer, hereby
+denoted as "the implementer".
+
+For more information, feedback or questions, please refer to our websites:
+http://keccak.noekeon.org/
+http://keyak.noekeon.org/
+http://ketje.noekeon.org/
+
+To the extent possible under law, the implementer has waived all copyright
+and related or neighboring rights to the source code in this file.
+http://creativecommons.org/publicdomain/zero/1.0/
+*/
+
+#include <string.h>
+#include <stdlib.h>
+/* #include "brg_endian.h" */
+#include "KeccakP-1600-opt64-config.h"
+
+#if NOT_PYTHON
+typedef unsigned char UINT8;
+/* typedef unsigned long long int UINT64; */
+#endif
+
+#if defined(KeccakP1600_useLaneComplementing)
+#define UseBebigokimisa
+#endif
+
+#if defined(_MSC_VER)
+#define ROL64(a, offset) _rotl64(a, offset)
+#elif defined(KeccakP1600_useSHLD)
+ #define ROL64(x,N) ({ \
+ register UINT64 __out; \
+ register UINT64 __in = x; \
+ __asm__ ("shld %2,%0,%0" : "=r"(__out) : "0"(__in), "i"(N)); \
+ __out; \
+ })
+#else
+#define ROL64(a, offset) ((((UINT64)a) << offset) ^ (((UINT64)a) >> (64-offset)))
+#endif
+
+#include "KeccakP-1600-64.macros"
+#ifdef KeccakP1600_fullUnrolling
+#define FullUnrolling
+#else
+#define Unrolling KeccakP1600_unrolling
+#endif
+#include "KeccakP-1600-unrolling.macros"
+#include "SnP-Relaned.h"
+
+static const UINT64 KeccakF1600RoundConstants[24] = {
+ 0x0000000000000001ULL,
+ 0x0000000000008082ULL,
+ 0x800000000000808aULL,
+ 0x8000000080008000ULL,
+ 0x000000000000808bULL,
+ 0x0000000080000001ULL,
+ 0x8000000080008081ULL,
+ 0x8000000000008009ULL,
+ 0x000000000000008aULL,
+ 0x0000000000000088ULL,
+ 0x0000000080008009ULL,
+ 0x000000008000000aULL,
+ 0x000000008000808bULL,
+ 0x800000000000008bULL,
+ 0x8000000000008089ULL,
+ 0x8000000000008003ULL,
+ 0x8000000000008002ULL,
+ 0x8000000000000080ULL,
+ 0x000000000000800aULL,
+ 0x800000008000000aULL,
+ 0x8000000080008081ULL,
+ 0x8000000000008080ULL,
+ 0x0000000080000001ULL,
+ 0x8000000080008008ULL };
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_Initialize(void *state)
+{
+ memset(state, 0, 200);
+#ifdef KeccakP1600_useLaneComplementing
+ ((UINT64*)state)[ 1] = ~(UINT64)0;
+ ((UINT64*)state)[ 2] = ~(UINT64)0;
+ ((UINT64*)state)[ 8] = ~(UINT64)0;
+ ((UINT64*)state)[12] = ~(UINT64)0;
+ ((UINT64*)state)[17] = ~(UINT64)0;
+ ((UINT64*)state)[20] = ~(UINT64)0;
+#endif
+}
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_AddBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length)
+{
+#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
+ UINT64 lane;
+ if (length == 0)
+ return;
+ if (length == 1)
+ lane = data[0];
+ else {
+ lane = 0;
+ memcpy(&lane, data, length);
+ }
+ lane <<= offset*8;
+#else
+ UINT64 lane = 0;
+ unsigned int i;
+ for(i=0; i<length; i++)
+ lane |= ((UINT64)data[i]) << ((i+offset)*8);
+#endif
+ ((UINT64*)state)[lanePosition] ^= lane;
+}
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_AddLanes(void *state, const unsigned char *data, unsigned int laneCount)
+{
+#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
+ unsigned int i = 0;
+#ifdef NO_MISALIGNED_ACCESSES
+ /* If either pointer is misaligned, fall back to byte-wise xor. */
+
+ if (((((uintptr_t)state) & 7) != 0) || ((((uintptr_t)data) & 7) != 0)) {
+ for (i = 0; i < laneCount * 8; i++) {
+ ((unsigned char*)state)[i] ^= data[i];
+ }
+ }
+ else
+#endif
+ {
+ /* Otherwise... */
+
+ for( ; (i+8)<=laneCount; i+=8) {
+ ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0];
+ ((UINT64*)state)[i+1] ^= ((UINT64*)data)[i+1];
+ ((UINT64*)state)[i+2] ^= ((UINT64*)data)[i+2];
+ ((UINT64*)state)[i+3] ^= ((UINT64*)data)[i+3];
+ ((UINT64*)state)[i+4] ^= ((UINT64*)data)[i+4];
+ ((UINT64*)state)[i+5] ^= ((UINT64*)data)[i+5];
+ ((UINT64*)state)[i+6] ^= ((UINT64*)data)[i+6];
+ ((UINT64*)state)[i+7] ^= ((UINT64*)data)[i+7];
+ }
+ for( ; (i+4)<=laneCount; i+=4) {
+ ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0];
+ ((UINT64*)state)[i+1] ^= ((UINT64*)data)[i+1];
+ ((UINT64*)state)[i+2] ^= ((UINT64*)data)[i+2];
+ ((UINT64*)state)[i+3] ^= ((UINT64*)data)[i+3];
+ }
+ for( ; (i+2)<=laneCount; i+=2) {
+ ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0];
+ ((UINT64*)state)[i+1] ^= ((UINT64*)data)[i+1];
+ }
+ if (i<laneCount) {
+ ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0];
+ }
+ }
+#else
+ unsigned int i;
+ UINT8 *curData = data;
+ for(i=0; i<laneCount; i++, curData+=8) {
+ UINT64 lane = (UINT64)curData[0]
+ | ((UINT64)curData[1] << 8)
+ | ((UINT64)curData[2] << 16)
+ | ((UINT64)curData[3] << 24)
+ | ((UINT64)curData[4] <<32)
+ | ((UINT64)curData[5] << 40)
+ | ((UINT64)curData[6] << 48)
+ | ((UINT64)curData[7] << 56);
+ ((UINT64*)state)[i] ^= lane;
+ }
+#endif
+}
+
+/* ---------------------------------------------------------------- */
+
+#if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN)
+void KeccakP1600_AddByte(void *state, unsigned char byte, unsigned int offset)
+{
+ UINT64 lane = byte;
+ lane <<= (offset%8)*8;
+ ((UINT64*)state)[offset/8] ^= lane;
+}
+#endif
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
+{
+ SnP_AddBytes(state, data, offset, length, KeccakP1600_AddLanes, KeccakP1600_AddBytesInLane, 8);
+}
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_OverwriteBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length)
+{
+#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
+#ifdef KeccakP1600_useLaneComplementing
+ if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) {
+ unsigned int i;
+ for(i=0; i<length; i++)
+ ((unsigned char*)state)[lanePosition*8+offset+i] = ~data[i];
+ }
+ else
+#endif
+ {
+ memcpy((unsigned char*)state+lanePosition*8+offset, data, length);
+ }
+#else
+#error "Not yet implemented"
+#endif
+}
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_OverwriteLanes(void *state, const unsigned char *data, unsigned int laneCount)
+{
+#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
+#ifdef KeccakP1600_useLaneComplementing
+ unsigned int lanePosition;
+
+ for(lanePosition=0; lanePosition<laneCount; lanePosition++)
+ if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
+ ((UINT64*)state)[lanePosition] = ~((const UINT64*)data)[lanePosition];
+ else
+ ((UINT64*)state)[lanePosition] = ((const UINT64*)data)[lanePosition];
+#else
+ memcpy(state, data, laneCount*8);
+#endif
+#else
+#error "Not yet implemented"
+#endif
+}
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
+{
+ SnP_OverwriteBytes(state, data, offset, length, KeccakP1600_OverwriteLanes, KeccakP1600_OverwriteBytesInLane, 8);
+}
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount)
+{
+#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
+#ifdef KeccakP1600_useLaneComplementing
+ unsigned int lanePosition;
+
+ for(lanePosition=0; lanePosition<byteCount/8; lanePosition++)
+ if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
+ ((UINT64*)state)[lanePosition] = ~0;
+ else
+ ((UINT64*)state)[lanePosition] = 0;
+ if (byteCount%8 != 0) {
+ lanePosition = byteCount/8;
+ if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
+ memset((unsigned char*)state+lanePosition*8, 0xFF, byteCount%8);
+ else
+ memset((unsigned char*)state+lanePosition*8, 0, byteCount%8);
+ }
+#else
+ memset(state, 0, byteCount);
+#endif
+#else
+#error "Not yet implemented"
+#endif
+}
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_Permute_24rounds(void *state)
+{
+ declareABCDE
+ #ifndef KeccakP1600_fullUnrolling
+ unsigned int i;
+ #endif
+ UINT64 *stateAsLanes = (UINT64*)state;
+
+ copyFromState(A, stateAsLanes)
+ rounds24
+ copyToState(stateAsLanes, A)
+}
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_Permute_12rounds(void *state)
+{
+ declareABCDE
+ #ifndef KeccakP1600_fullUnrolling
+ unsigned int i;
+ #endif
+ UINT64 *stateAsLanes = (UINT64*)state;
+
+ copyFromState(A, stateAsLanes)
+ rounds12
+ copyToState(stateAsLanes, A)
+}
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_ExtractBytesInLane(const void *state, unsigned int lanePosition, unsigned char *data, unsigned int offset, unsigned int length)
+{
+ UINT64 lane = ((UINT64*)state)[lanePosition];
+#ifdef KeccakP1600_useLaneComplementing
+ if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
+ lane = ~lane;
+#endif
+#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
+ {
+ UINT64 lane1[1];
+ lane1[0] = lane;
+ memcpy(data, (UINT8*)lane1+offset, length);
+ }
+#else
+ unsigned int i;
+ lane >>= offset*8;
+ for(i=0; i<length; i++) {
+ data[i] = lane & 0xFF;
+ lane >>= 8;
+ }
+#endif
+}
+
+/* ---------------------------------------------------------------- */
+
+#if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN)
+void fromWordToBytes(UINT8 *bytes, const UINT64 word)
+{
+ unsigned int i;
+
+ for(i=0; i<(64/8); i++)
+ bytes[i] = (word >> (8*i)) & 0xFF;
+}
+#endif
+
+void KeccakP1600_ExtractLanes(const void *state, unsigned char *data, unsigned int laneCount)
+{
+#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
+ memcpy(data, state, laneCount*8);
+#else
+ unsigned int i;
+
+ for(i=0; i<laneCount; i++)
+ fromWordToBytes(data+(i*8), ((const UINT64*)state)[i]);
+#endif
+#ifdef KeccakP1600_useLaneComplementing
+ if (laneCount > 1) {
+ ((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1];
+ if (laneCount > 2) {
+ ((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2];
+ if (laneCount > 8) {
+ ((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8];
+ if (laneCount > 12) {
+ ((UINT64*)data)[12] = ~((UINT64*)data)[12];
+ if (laneCount > 17) {
+ ((UINT64*)data)[17] = ~((UINT64*)data)[17];
+ if (laneCount > 20) {
+ ((UINT64*)data)[20] = ~((UINT64*)data)[20];
+ }
+ }
+ }
+ }
+ }
+ }
+#endif
+}
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length)
+{
+ SnP_ExtractBytes(state, data, offset, length, KeccakP1600_ExtractLanes, KeccakP1600_ExtractBytesInLane, 8);
+}
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_ExtractAndAddBytesInLane(const void *state, unsigned int lanePosition, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
+{
+ UINT64 lane = ((UINT64*)state)[lanePosition];
+#ifdef KeccakP1600_useLaneComplementing
+ if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
+ lane = ~lane;
+#endif
+#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
+ {
+ unsigned int i;
+ UINT64 lane1[1];
+ lane1[0] = lane;
+ for(i=0; i<length; i++)
+ output[i] = input[i] ^ ((UINT8*)lane1)[offset+i];
+ }
+#else
+ unsigned int i;
+ lane >>= offset*8;
+ for(i=0; i<length; i++) {
+ output[i] = input[i] ^ (lane & 0xFF);
+ lane >>= 8;
+ }
+#endif
+}
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_ExtractAndAddLanes(const void *state, const unsigned char *input, unsigned char *output, unsigned int laneCount)
+{
+ unsigned int i;
+#if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN)
+ unsigned char temp[8];
+ unsigned int j;
+#endif
+
+ for(i=0; i<laneCount; i++) {
+#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
+ ((UINT64*)output)[i] = ((UINT64*)input)[i] ^ ((const UINT64*)state)[i];
+#else
+ fromWordToBytes(temp, ((const UINT64*)state)[i]);
+ for(j=0; j<8; j++)
+ output[i*8+j] = input[i*8+j] ^ temp[j];
+#endif
+ }
+#ifdef KeccakP1600_useLaneComplementing
+ if (laneCount > 1) {
+ ((UINT64*)output)[ 1] = ~((UINT64*)output)[ 1];
+ if (laneCount > 2) {
+ ((UINT64*)output)[ 2] = ~((UINT64*)output)[ 2];
+ if (laneCount > 8) {
+ ((UINT64*)output)[ 8] = ~((UINT64*)output)[ 8];
+ if (laneCount > 12) {
+ ((UINT64*)output)[12] = ~((UINT64*)output)[12];
+ if (laneCount > 17) {
+ ((UINT64*)output)[17] = ~((UINT64*)output)[17];
+ if (laneCount > 20) {
+ ((UINT64*)output)[20] = ~((UINT64*)output)[20];
+ }
+ }
+ }
+ }
+ }
+ }
+#endif
+}
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
+{
+ SnP_ExtractAndAddBytes(state, input, output, offset, length, KeccakP1600_ExtractAndAddLanes, KeccakP1600_ExtractAndAddBytesInLane, 8);
+}
+
+/* ---------------------------------------------------------------- */
+
+size_t KeccakF1600_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen)
+{
+ size_t originalDataByteLen = dataByteLen;
+ declareABCDE
+ #ifndef KeccakP1600_fullUnrolling
+ unsigned int i;
+ #endif
+ UINT64 *stateAsLanes = (UINT64*)state;
+ UINT64 *inDataAsLanes = (UINT64*)data;
+
+ copyFromState(A, stateAsLanes)
+ while(dataByteLen >= laneCount*8) {
+ addInput(A, inDataAsLanes, laneCount)
+ rounds24
+ inDataAsLanes += laneCount;
+ dataByteLen -= laneCount*8;
+ }
+ copyToState(stateAsLanes, A)
+ return originalDataByteLen - dataByteLen;
+}