summaryrefslogtreecommitdiff
path: root/rijndael.cpp
diff options
context:
space:
mode:
authorweidai <weidai@57ff6487-cd31-0410-9ec3-f628ee90f5f0>2006-07-17 14:48:59 +0000
committerweidai <weidai@57ff6487-cd31-0410-9ec3-f628ee90f5f0>2006-07-17 14:48:59 +0000
commit69fb6c7fd8f5f591b38bb134567ea0786182b0ca (patch)
treec8076624e56c952ffd234c8327848a392eb6ecd1 /rijndael.cpp
parentdc4097807c67cdd91b7aeafc6f144e7f3640ae59 (diff)
downloadcryptopp-69fb6c7fd8f5f591b38bb134567ea0786182b0ca.tar.gz
AES timing attack countermeasures
git-svn-id: svn://svn.code.sf.net/p/cryptopp/code/trunk/c5@226 57ff6487-cd31-0410-9ec3-f628ee90f5f0
Diffstat (limited to 'rijndael.cpp')
-rw-r--r--rijndael.cpp515
1 files changed, 292 insertions, 223 deletions
diff --git a/rijndael.cpp b/rijndael.cpp
index 2843bec..7131e01 100644
--- a/rijndael.cpp
+++ b/rijndael.cpp
@@ -2,6 +2,23 @@
// and Wei Dai from Paulo Baretto's Rijndael implementation
// The original code and all modifications are in the public domain.
+/*
+Defense against timing attacks was added in July 2006 by Wei Dai.
+
+The code now uses smaller tables in the first and last rounds,
+and preloads them into L1 cache before usage (by loading at least
+one element in each cache line).
+
+We try to delay subsequent accesses to each table (used in the first
+and last rounds) until all of the table has been preloaded. Hopefully
+the compiler isn't smart enough to optimize that code away.
+
+After preloading the table, we also try not to access any memory location
+other than the table and the stack, in order to prevent table entries from
+being unloaded from L1 cache, until that round is finished.
+(Some popular CPUs have 2-way associative caches.)
+*/
+
// This is the original introductory comment:
/**
@@ -45,80 +62,45 @@ void Rijndael::Base::UncheckedSetKey(CipherDir dir, const byte *userKey, unsigne
m_key.New(4*(m_rounds+1));
word32 temp, *rk = m_key;
+ const word32 *rc = rcon;
unsigned int i=0;
GetUserKey(BIG_ENDIAN_ORDER, rk, keylen/4, userKey, keylen);
- switch(keylen)
+ while (true)
{
- case 16:
- while (true)
- {
- temp = rk[3];
- rk[4] = rk[0] ^
- (Te4[GETBYTE(temp, 2)] & 0xff000000) ^
- (Te4[GETBYTE(temp, 1)] & 0x00ff0000) ^
- (Te4[GETBYTE(temp, 0)] & 0x0000ff00) ^
- (Te4[GETBYTE(temp, 3)] & 0x000000ff) ^
- rcon[i];
- rk[5] = rk[1] ^ rk[4];
- rk[6] = rk[2] ^ rk[5];
- rk[7] = rk[3] ^ rk[6];
- if (++i == 10)
- break;
- rk += 4;
- }
- break;
+ temp = rk[keylen/4-1];
+ rk[keylen/4] = rk[0] ^
+ (word32(Se[GETBYTE(temp, 2)]) << 24) ^
+ (word32(Se[GETBYTE(temp, 1)]) << 16) ^
+ (word32(Se[GETBYTE(temp, 0)]) << 8) ^
+ Se[GETBYTE(temp, 3)] ^
+ *(rc++);
+ rk[keylen/4+1] = rk[1] ^ rk[keylen/4];
+ rk[keylen/4+2] = rk[2] ^ rk[keylen/4+1];
+ rk[keylen/4+3] = rk[3] ^ rk[keylen/4+2];
+
+ if (rk + keylen/4 + 4 == m_key.end())
+ break;
- case 24:
- while (true) // for (;;) here triggers a bug in VC60 SP4 w/ Processor Pack
+ if (keylen == 24)
{
- temp = rk[ 5];
- rk[ 6] = rk[ 0] ^
- (Te4[GETBYTE(temp, 2)] & 0xff000000) ^
- (Te4[GETBYTE(temp, 1)] & 0x00ff0000) ^
- (Te4[GETBYTE(temp, 0)] & 0x0000ff00) ^
- (Te4[GETBYTE(temp, 3)] & 0x000000ff) ^
- rcon[i];
- rk[ 7] = rk[ 1] ^ rk[ 6];
- rk[ 8] = rk[ 2] ^ rk[ 7];
- rk[ 9] = rk[ 3] ^ rk[ 8];
- if (++i == 8)
- break;
rk[10] = rk[ 4] ^ rk[ 9];
rk[11] = rk[ 5] ^ rk[10];
- rk += 6;
}
- break;
-
- case 32:
- while (true)
+ else if (keylen == 32)
{
- temp = rk[ 7];
- rk[ 8] = rk[ 0] ^
- (Te4[GETBYTE(temp, 2)] & 0xff000000) ^
- (Te4[GETBYTE(temp, 1)] & 0x00ff0000) ^
- (Te4[GETBYTE(temp, 0)] & 0x0000ff00) ^
- (Te4[GETBYTE(temp, 3)] & 0x000000ff) ^
- rcon[i];
- rk[ 9] = rk[ 1] ^ rk[ 8];
- rk[10] = rk[ 2] ^ rk[ 9];
- rk[11] = rk[ 3] ^ rk[10];
- if (++i == 7)
- break;
- temp = rk[11];
- rk[12] = rk[ 4] ^
- (Te4[GETBYTE(temp, 3)] & 0xff000000) ^
- (Te4[GETBYTE(temp, 2)] & 0x00ff0000) ^
- (Te4[GETBYTE(temp, 1)] & 0x0000ff00) ^
- (Te4[GETBYTE(temp, 0)] & 0x000000ff);
- rk[13] = rk[ 5] ^ rk[12];
- rk[14] = rk[ 6] ^ rk[13];
- rk[15] = rk[ 7] ^ rk[14];
-
- rk += 8;
- }
- break;
+ temp = rk[11];
+ rk[12] = rk[ 4] ^
+ (word32(Se[GETBYTE(temp, 3)]) << 24) ^
+ (word32(Se[GETBYTE(temp, 2)]) << 16) ^
+ (word32(Se[GETBYTE(temp, 1)]) << 8) ^
+ Se[GETBYTE(temp, 0)];
+ rk[13] = rk[ 5] ^ rk[12];
+ rk[14] = rk[ 6] ^ rk[13];
+ rk[15] = rk[ 7] ^ rk[14];
+ }
+ rk += keylen/4;
}
if (dir == DECRYPTION)
@@ -137,50 +119,112 @@ void Rijndael::Base::UncheckedSetKey(CipherDir dir, const byte *userKey, unsigne
for (i = 1; i < m_rounds; i++) {
rk += 4;
rk[0] =
- Td0[Te4[GETBYTE(rk[0], 3)] & 0xff] ^
- Td1[Te4[GETBYTE(rk[0], 2)] & 0xff] ^
- Td2[Te4[GETBYTE(rk[0], 1)] & 0xff] ^
- Td3[Te4[GETBYTE(rk[0], 0)] & 0xff];
+ Td0[Se[GETBYTE(rk[0], 3)]] ^
+ Td1[Se[GETBYTE(rk[0], 2)]] ^
+ Td2[Se[GETBYTE(rk[0], 1)]] ^
+ Td3[Se[GETBYTE(rk[0], 0)]];
rk[1] =
- Td0[Te4[GETBYTE(rk[1], 3)] & 0xff] ^
- Td1[Te4[GETBYTE(rk[1], 2)] & 0xff] ^
- Td2[Te4[GETBYTE(rk[1], 1)] & 0xff] ^
- Td3[Te4[GETBYTE(rk[1], 0)] & 0xff];
+ Td0[Se[GETBYTE(rk[1], 3)]] ^
+ Td1[Se[GETBYTE(rk[1], 2)]] ^
+ Td2[Se[GETBYTE(rk[1], 1)]] ^
+ Td3[Se[GETBYTE(rk[1], 0)]];
rk[2] =
- Td0[Te4[GETBYTE(rk[2], 3)] & 0xff] ^
- Td1[Te4[GETBYTE(rk[2], 2)] & 0xff] ^
- Td2[Te4[GETBYTE(rk[2], 1)] & 0xff] ^
- Td3[Te4[GETBYTE(rk[2], 0)] & 0xff];
+ Td0[Se[GETBYTE(rk[2], 3)]] ^
+ Td1[Se[GETBYTE(rk[2], 2)]] ^
+ Td2[Se[GETBYTE(rk[2], 1)]] ^
+ Td3[Se[GETBYTE(rk[2], 0)]];
rk[3] =
- Td0[Te4[GETBYTE(rk[3], 3)] & 0xff] ^
- Td1[Te4[GETBYTE(rk[3], 2)] & 0xff] ^
- Td2[Te4[GETBYTE(rk[3], 1)] & 0xff] ^
- Td3[Te4[GETBYTE(rk[3], 0)] & 0xff];
+ Td0[Se[GETBYTE(rk[3], 3)]] ^
+ Td1[Se[GETBYTE(rk[3], 2)]] ^
+ Td2[Se[GETBYTE(rk[3], 1)]] ^
+ Td3[Se[GETBYTE(rk[3], 0)]];
}
}
+
+ ConditionalByteReverse(BIG_ENDIAN_ORDER, m_key.begin(), m_key.begin(), 16);
+ ConditionalByteReverse(BIG_ENDIAN_ORDER, m_key + m_rounds*4, m_key + m_rounds*4, 16);
}
-typedef BlockGetAndPut<word32, BigEndian> Block;
+const static unsigned int s_lineSizeDiv4 = CRYPTOPP_L1_CACHE_LINE_SIZE/4;
+#ifdef IS_BIG_ENDIAN
+const static unsigned int s_i3=3, s_i2=2, s_i1=1, s_i0=0;
+#else
+const static unsigned int s_i3=0, s_i2=1, s_i1=2, s_i0=3;
+#endif
void Rijndael::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
{
word32 s0, s1, s2, s3, t0, t1, t2, t3;
const word32 *rk = m_key;
- /*
- * map byte array block to cipher state
- * and add initial round key:
- */
- Block::Get(inBlock)(s0)(s1)(s2)(s3);
- s0 ^= rk[0];
- s1 ^= rk[1];
- s2 ^= rk[2];
- s3 ^= rk[3];
- /*
- * Nr - 1 full rounds:
- */
- unsigned int r = m_rounds >> 1;
- for (;;) {
+ s0 = ((const word32 *)inBlock)[0] ^ rk[0];
+ s1 = ((const word32 *)inBlock)[1] ^ rk[1];
+ s2 = ((const word32 *)inBlock)[2] ^ rk[2];
+ s3 = ((const word32 *)inBlock)[3] ^ rk[3];
+ t0 = rk[4];
+ t1 = rk[5];
+ t2 = rk[6];
+ t3 = rk[7];
+ rk += 8;
+
+ // timing attack countermeasure. see comments at top for more details
+ unsigned int i;
+ word32 u = 0;
+ for (i=0; i<sizeof(Te0)/4; i+=CRYPTOPP_L1_CACHE_LINE_SIZE)
+ u &= (Te0[i+0*s_lineSizeDiv4] & Te0[i+2*s_lineSizeDiv4]) & (Te0[i+1*s_lineSizeDiv4] & Te0[i+3*s_lineSizeDiv4]);
+ s0 |= u; s1 |= u; s2 |= u; s3 |= u;
+
+ // first round
+ t0 ^=
+ Te0[GETBYTE(s0, s_i3)] ^
+ rotrFixed(Te0[GETBYTE(s1, s_i2)], 8) ^
+ rotrFixed(Te0[GETBYTE(s2, s_i1)], 16) ^
+ rotrFixed(Te0[GETBYTE(s3, s_i0)], 24);
+ t1 ^=
+ Te0[GETBYTE(s1, s_i3)] ^
+ rotrFixed(Te0[GETBYTE(s2, s_i2)], 8) ^
+ rotrFixed(Te0[GETBYTE(s3, s_i1)], 16) ^
+ rotrFixed(Te0[GETBYTE(s0, s_i0)], 24);
+ t2 ^=
+ Te0[GETBYTE(s2, s_i3)] ^
+ rotrFixed(Te0[GETBYTE(s3, s_i2)], 8) ^
+ rotrFixed(Te0[GETBYTE(s0, s_i1)], 16) ^
+ rotrFixed(Te0[GETBYTE(s1, s_i0)], 24);
+ t3 ^=
+ Te0[GETBYTE(s3, s_i3)] ^
+ rotrFixed(Te0[GETBYTE(s0, s_i2)], 8) ^
+ rotrFixed(Te0[GETBYTE(s1, s_i1)], 16) ^
+ rotrFixed(Te0[GETBYTE(s2, s_i0)], 24);
+
+ // Nr - 2 full rounds:
+ unsigned int r = m_rounds/2 - 1;
+ do
+ {
+ s0 =
+ Te0[GETBYTE(t0, 3)] ^
+ Te1[GETBYTE(t1, 2)] ^
+ Te2[GETBYTE(t2, 1)] ^
+ Te3[GETBYTE(t3, 0)] ^
+ rk[0];
+ s1 =
+ Te0[GETBYTE(t1, 3)] ^
+ Te1[GETBYTE(t2, 2)] ^
+ Te2[GETBYTE(t3, 1)] ^
+ Te3[GETBYTE(t0, 0)] ^
+ rk[1];
+ s2 =
+ Te0[GETBYTE(t2, 3)] ^
+ Te1[GETBYTE(t3, 2)] ^
+ Te2[GETBYTE(t0, 1)] ^
+ Te3[GETBYTE(t1, 0)] ^
+ rk[2];
+ s3 =
+ Te0[GETBYTE(t3, 3)] ^
+ Te1[GETBYTE(t0, 2)] ^
+ Te2[GETBYTE(t1, 1)] ^
+ Te3[GETBYTE(t2, 0)] ^
+ rk[3];
+
t0 =
Te0[GETBYTE(s0, 3)] ^
Te1[GETBYTE(s1, 2)] ^
@@ -207,87 +251,126 @@ void Rijndael::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock
rk[7];
rk += 8;
- if (--r == 0) {
- break;
- }
+ } while (--r);
+
+ // timing attack countermeasure. see comments at top for more details
+ u = 0;
+ for (i=0; i<sizeof(Se)/4; i+=CRYPTOPP_L1_CACHE_LINE_SIZE)
+ u &= (((word32*)Se)[i+0*s_lineSizeDiv4] & ((word32*)Se)[i+2*s_lineSizeDiv4]) & (((word32*)Se)[i+1*s_lineSizeDiv4] & ((word32*)Se)[i+3*s_lineSizeDiv4]);
+ t0 |= u; t1 |= u; t2 |= u; t3 |= u;
+
+ word32 tbw[4];
+ byte *const tempBlock = (byte *)tbw;
+ word32 *const obw = (word32 *)outBlock;
+ const word32 *const xbw = (const word32 *)xorBlock;
+
+ // last round
+ tempBlock[0] = Se[GETBYTE(t0, 3)];
+ tempBlock[1] = Se[GETBYTE(t1, 2)];
+ tempBlock[2] = Se[GETBYTE(t2, 1)];
+ tempBlock[3] = Se[GETBYTE(t3, 0)];
+ tempBlock[4] = Se[GETBYTE(t1, 3)];
+ tempBlock[5] = Se[GETBYTE(t2, 2)];
+ tempBlock[6] = Se[GETBYTE(t3, 1)];
+ tempBlock[7] = Se[GETBYTE(t0, 0)];
+ tempBlock[8] = Se[GETBYTE(t2, 3)];
+ tempBlock[9] = Se[GETBYTE(t3, 2)];
+ tempBlock[10] = Se[GETBYTE(t0, 1)];
+ tempBlock[11] = Se[GETBYTE(t1, 0)];
+ tempBlock[12] = Se[GETBYTE(t3, 3)];
+ tempBlock[13] = Se[GETBYTE(t0, 2)];
+ tempBlock[14] = Se[GETBYTE(t1, 1)];
+ tempBlock[15] = Se[GETBYTE(t2, 0)];
+
+ if (xbw)
+ {
+ obw[0] = tbw[0] ^ xbw[0] ^ rk[0];
+ obw[1] = tbw[1] ^ xbw[1] ^ rk[1];
+ obw[2] = tbw[2] ^ xbw[2] ^ rk[2];
+ obw[3] = tbw[3] ^ xbw[3] ^ rk[3];
+ }
+ else
+ {
+ obw[0] = tbw[0] ^ rk[0];
+ obw[1] = tbw[1] ^ rk[1];
+ obw[2] = tbw[2] ^ rk[2];
+ obw[3] = tbw[3] ^ rk[3];
+ }
+}
+
+void Rijndael::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
+{
+ word32 s0, s1, s2, s3, t0, t1, t2, t3;
+ const word32 *rk = m_key;
+
+ s0 = ((const word32 *)inBlock)[0] ^ rk[0];
+ s1 = ((const word32 *)inBlock)[1] ^ rk[1];
+ s2 = ((const word32 *)inBlock)[2] ^ rk[2];
+ s3 = ((const word32 *)inBlock)[3] ^ rk[3];
+ t0 = rk[4];
+ t1 = rk[5];
+ t2 = rk[6];
+ t3 = rk[7];
+ rk += 8;
+ // timing attack countermeasure. see comments at top for more details
+ unsigned int i;
+ word32 u = 0;
+ for (i=0; i<sizeof(Td0)/4; i+=CRYPTOPP_L1_CACHE_LINE_SIZE)
+ u &= (Td0[i+0*s_lineSizeDiv4] & Td0[i+2*s_lineSizeDiv4]) & (Td0[i+1*s_lineSizeDiv4] & Td0[i+3*s_lineSizeDiv4]);
+ s0 |= u; s1 |= u; s2 |= u; s3 |= u;
+
+ // first round
+ t0 ^=
+ Td0[GETBYTE(s0, s_i3)] ^
+ rotrFixed(Td0[GETBYTE(s3, s_i2)], 8) ^
+ rotrFixed(Td0[GETBYTE(s2, s_i1)], 16) ^
+ rotrFixed(Td0[GETBYTE(s1, s_i0)], 24);
+ t1 ^=
+ Td0[GETBYTE(s1, s_i3)] ^
+ rotrFixed(Td0[GETBYTE(s0, s_i2)], 8) ^
+ rotrFixed(Td0[GETBYTE(s3, s_i1)], 16) ^
+ rotrFixed(Td0[GETBYTE(s2, s_i0)], 24);
+ t2 ^=
+ Td0[GETBYTE(s2, s_i3)] ^
+ rotrFixed(Td0[GETBYTE(s1, s_i2)], 8) ^
+ rotrFixed(Td0[GETBYTE(s0, s_i1)], 16) ^
+ rotrFixed(Td0[GETBYTE(s3, s_i0)], 24);
+ t3 ^=
+ Td0[GETBYTE(s3, s_i3)] ^
+ rotrFixed(Td0[GETBYTE(s2, s_i2)], 8) ^
+ rotrFixed(Td0[GETBYTE(s1, s_i1)], 16) ^
+ rotrFixed(Td0[GETBYTE(s0, s_i0)], 24);
+
+ // Nr - 2 full rounds:
+ unsigned int r = m_rounds/2 - 1;
+ do
+ {
s0 =
- Te0[GETBYTE(t0, 3)] ^
- Te1[GETBYTE(t1, 2)] ^
- Te2[GETBYTE(t2, 1)] ^
- Te3[GETBYTE(t3, 0)] ^
+ Td0[GETBYTE(t0, 3)] ^
+ Td1[GETBYTE(t3, 2)] ^
+ Td2[GETBYTE(t2, 1)] ^
+ Td3[GETBYTE(t1, 0)] ^
rk[0];
s1 =
- Te0[GETBYTE(t1, 3)] ^
- Te1[GETBYTE(t2, 2)] ^
- Te2[GETBYTE(t3, 1)] ^
- Te3[GETBYTE(t0, 0)] ^
+ Td0[GETBYTE(t1, 3)] ^
+ Td1[GETBYTE(t0, 2)] ^
+ Td2[GETBYTE(t3, 1)] ^
+ Td3[GETBYTE(t2, 0)] ^
rk[1];
s2 =
- Te0[GETBYTE(t2, 3)] ^
- Te1[GETBYTE(t3, 2)] ^
- Te2[GETBYTE(t0, 1)] ^
- Te3[GETBYTE(t1, 0)] ^
+ Td0[GETBYTE(t2, 3)] ^
+ Td1[GETBYTE(t1, 2)] ^
+ Td2[GETBYTE(t0, 1)] ^
+ Td3[GETBYTE(t3, 0)] ^
rk[2];
s3 =
- Te0[GETBYTE(t3, 3)] ^
- Te1[GETBYTE(t0, 2)] ^
- Te2[GETBYTE(t1, 1)] ^
- Te3[GETBYTE(t2, 0)] ^
+ Td0[GETBYTE(t3, 3)] ^
+ Td1[GETBYTE(t2, 2)] ^
+ Td2[GETBYTE(t1, 1)] ^
+ Td3[GETBYTE(t0, 0)] ^
rk[3];
- }
- /*
- * apply last round and
- * map cipher state to byte array block:
- */
-
- s0 =
- (Te4[GETBYTE(t0, 3)] & 0xff000000) ^
- (Te4[GETBYTE(t1, 2)] & 0x00ff0000) ^
- (Te4[GETBYTE(t2, 1)] & 0x0000ff00) ^
- (Te4[GETBYTE(t3, 0)] & 0x000000ff) ^
- rk[0];
- s1 =
- (Te4[GETBYTE(t1, 3)] & 0xff000000) ^
- (Te4[GETBYTE(t2, 2)] & 0x00ff0000) ^
- (Te4[GETBYTE(t3, 1)] & 0x0000ff00) ^
- (Te4[GETBYTE(t0, 0)] & 0x000000ff) ^
- rk[1];
- s2 =
- (Te4[GETBYTE(t2, 3)] & 0xff000000) ^
- (Te4[GETBYTE(t3, 2)] & 0x00ff0000) ^
- (Te4[GETBYTE(t0, 1)] & 0x0000ff00) ^
- (Te4[GETBYTE(t1, 0)] & 0x000000ff) ^
- rk[2];
- s3 =
- (Te4[GETBYTE(t3, 3)] & 0xff000000) ^
- (Te4[GETBYTE(t0, 2)] & 0x00ff0000) ^
- (Te4[GETBYTE(t1, 1)] & 0x0000ff00) ^
- (Te4[GETBYTE(t2, 0)] & 0x000000ff) ^
- rk[3];
-
- Block::Put(xorBlock, outBlock)(s0)(s1)(s2)(s3);
-}
-
-void Rijndael::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
-{
- word32 s0, s1, s2, s3, t0, t1, t2, t3;
- const word32 *rk = m_key;
- /*
- * map byte array block to cipher state
- * and add initial round key:
- */
- Block::Get(inBlock)(s0)(s1)(s2)(s3);
- s0 ^= rk[0];
- s1 ^= rk[1];
- s2 ^= rk[2];
- s3 ^= rk[3];
- /*
- * Nr - 1 full rounds:
- */
- unsigned int r = m_rounds >> 1;
- for (;;) {
t0 =
Td0[GETBYTE(s0, 3)] ^
Td1[GETBYTE(s3, 2)] ^
@@ -314,65 +397,51 @@ void Rijndael::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock
rk[7];
rk += 8;
- if (--r == 0) {
- break;
- }
+ } while (--r);
- s0 =
- Td0[GETBYTE(t0, 3)] ^
- Td1[GETBYTE(t3, 2)] ^
- Td2[GETBYTE(t2, 1)] ^
- Td3[GETBYTE(t1, 0)] ^
- rk[0];
- s1 =
- Td0[GETBYTE(t1, 3)] ^
- Td1[GETBYTE(t0, 2)] ^
- Td2[GETBYTE(t3, 1)] ^
- Td3[GETBYTE(t2, 0)] ^
- rk[1];
- s2 =
- Td0[GETBYTE(t2, 3)] ^
- Td1[GETBYTE(t1, 2)] ^
- Td2[GETBYTE(t0, 1)] ^
- Td3[GETBYTE(t3, 0)] ^
- rk[2];
- s3 =
- Td0[GETBYTE(t3, 3)] ^
- Td1[GETBYTE(t2, 2)] ^
- Td2[GETBYTE(t1, 1)] ^
- Td3[GETBYTE(t0, 0)] ^
- rk[3];
- }
- /*
- * apply last round and
- * map cipher state to byte array block:
- */
- s0 =
- (Td4[GETBYTE(t0, 3)] & 0xff000000) ^
- (Td4[GETBYTE(t3, 2)] & 0x00ff0000) ^
- (Td4[GETBYTE(t2, 1)] & 0x0000ff00) ^
- (Td4[GETBYTE(t1, 0)] & 0x000000ff) ^
- rk[0];
- s1 =
- (Td4[GETBYTE(t1, 3)] & 0xff000000) ^
- (Td4[GETBYTE(t0, 2)] & 0x00ff0000) ^
- (Td4[GETBYTE(t3, 1)] & 0x0000ff00) ^
- (Td4[GETBYTE(t2, 0)] & 0x000000ff) ^
- rk[1];
- s2 =
- (Td4[GETBYTE(t2, 3)] & 0xff000000) ^
- (Td4[GETBYTE(t1, 2)] & 0x00ff0000) ^
- (Td4[GETBYTE(t0, 1)] & 0x0000ff00) ^
- (Td4[GETBYTE(t3, 0)] & 0x000000ff) ^
- rk[2];
- s3 =
- (Td4[GETBYTE(t3, 3)] & 0xff000000) ^
- (Td4[GETBYTE(t2, 2)] & 0x00ff0000) ^
- (Td4[GETBYTE(t1, 1)] & 0x0000ff00) ^
- (Td4[GETBYTE(t0, 0)] & 0x000000ff) ^
- rk[3];
-
- Block::Put(xorBlock, outBlock)(s0)(s1)(s2)(s3);
+ // timing attack countermeasure. see comments at top for more details
+ u = 0;
+ for (i=0; i<sizeof(Sd)/4; i+=CRYPTOPP_L1_CACHE_LINE_SIZE)
+ u &= (((word32*)Sd)[i+0*s_lineSizeDiv4] & ((word32*)Sd)[i+2*s_lineSizeDiv4]) & (((word32*)Sd)[i+1*s_lineSizeDiv4] & ((word32*)Sd)[i+3*s_lineSizeDiv4]);
+ t0 |= u; t1 |= u; t2 |= u; t3 |= u;
+
+ word32 tbw[4];
+ byte *const tempBlock = (byte *)tbw;
+ word32 *const obw = (word32 *)outBlock;
+ const word32 *const xbw = (const word32 *)xorBlock;
+
+ // last round
+ tempBlock[0] = Sd[GETBYTE(t0, 3)];
+ tempBlock[1] = Sd[GETBYTE(t3, 2)];
+ tempBlock[2] = Sd[GETBYTE(t2, 1)];
+ tempBlock[3] = Sd[GETBYTE(t1, 0)];
+ tempBlock[4] = Sd[GETBYTE(t1, 3)];
+ tempBlock[5] = Sd[GETBYTE(t0, 2)];
+ tempBlock[6] = Sd[GETBYTE(t3, 1)];
+ tempBlock[7] = Sd[GETBYTE(t2, 0)];
+ tempBlock[8] = Sd[GETBYTE(t2, 3)];
+ tempBlock[9] = Sd[GETBYTE(t1, 2)];
+ tempBlock[10] = Sd[GETBYTE(t0, 1)];
+ tempBlock[11] = Sd[GETBYTE(t3, 0)];
+ tempBlock[12] = Sd[GETBYTE(t3, 3)];
+ tempBlock[13] = Sd[GETBYTE(t2, 2)];
+ tempBlock[14] = Sd[GETBYTE(t1, 1)];
+ tempBlock[15] = Sd[GETBYTE(t0, 0)];
+
+ if (xbw)
+ {
+ obw[0] = tbw[0] ^ xbw[0] ^ rk[0];
+ obw[1] = tbw[1] ^ xbw[1] ^ rk[1];
+ obw[2] = tbw[2] ^ xbw[2] ^ rk[2];
+ obw[3] = tbw[3] ^ xbw[3] ^ rk[3];
+ }
+ else
+ {
+ obw[0] = tbw[0] ^ rk[0];
+ obw[1] = tbw[1] ^ rk[1];
+ obw[2] = tbw[2] ^ rk[2];
+ obw[3] = tbw[3] ^ rk[3];
+ }
}
NAMESPACE_END