summaryrefslogtreecommitdiff
path: root/rijndael.cpp
diff options
context:
space:
mode:
authorweidai <weidai@57ff6487-cd31-0410-9ec3-f628ee90f5f0>2009-03-02 02:39:17 +0000
committerweidai <weidai@57ff6487-cd31-0410-9ec3-f628ee90f5f0>2009-03-02 02:39:17 +0000
commitcaf9e032e6b4ccb114a74a3936c916bcfaba262d (patch)
tree0fecaa7a6728d07549a41864ea2cedfb245f0bd3 /rijndael.cpp
parent4e4793cc591e26c788b53c487bee7cab2d377f5e (diff)
downloadcryptopp-caf9e032e6b4ccb114a74a3936c916bcfaba262d.tar.gz
changes for 5.6:
- added AuthenticatedSymmetricCipher interface class and Filter wrappers - added CCM, GCM (with SSE2 assembly), CMAC, and SEED - improved AES speed on x86 and x64 - removed WORD64_AVAILABLE; compiler 64-bit int support is now required git-svn-id: svn://svn.code.sf.net/p/cryptopp/code/trunk/c5@433 57ff6487-cd31-0410-9ec3-f628ee90f5f0
Diffstat (limited to 'rijndael.cpp')
-rw-r--r--rijndael.cpp967
1 files changed, 619 insertions, 348 deletions
diff --git a/rijndael.cpp b/rijndael.cpp
index b89e3b3..05c403a 100644
--- a/rijndael.cpp
+++ b/rijndael.cpp
@@ -5,6 +5,16 @@
// use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM rijndael.cpp" to generate MASM code
/*
+The assembly code was rewritten in Feb 2009 by Wei Dai to do counter mode
+caching, which was invented by Hongjun Wu and popularized by Daniel J. Bernstein
+and Peter Schwabe in their paper "New AES software speed records". The round
+function was also modified to include a trick similar to one in Brian Gladman's
+x86 assembly code, doing an 8-bit register move to minimize the number of
+register spills. Also switched to compressed tables and copying round keys to
+the stack.
+*/
+
+/*
Defense against timing attacks was added in July 2006 by Wei Dai.
The code now uses smaller tables in the first and last rounds,
@@ -58,6 +68,72 @@ being unloaded from L1 cache, until that round is finished.
NAMESPACE_BEGIN(CryptoPP)
+#ifdef CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS
+#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
+namespace rdtable {CRYPTOPP_ALIGN_DATA(16) word64 Te[256+2];}
+using namespace rdtable;
+#else
+static word64 Te[256];
+#endif
+static word32 Td[256*4];
+#else
+static word32 Te[256*4], Td[256*4];
+#endif
+static bool s_TeFilled = false, s_TdFilled = false;
+
+#define f2(x) ((x<<1)^(((x>>7)&1)*0x11b))
+#define f4(x) ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b))
+#define f8(x) ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b))
+
+#define f3(x) (f2(x) ^ x)
+#define f9(x) (f8(x) ^ x)
+#define fb(x) (f8(x) ^ f2(x) ^ x)
+#define fd(x) (f8(x) ^ f4(x) ^ x)
+#define fe(x) (f8(x) ^ f4(x) ^ f2(x))
+
+void Rijndael::Base::FillEncTable()
+{
+ for (int i=0; i<256; i++)
+ {
+ byte x = Se[i];
+#ifdef CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS
+ word32 y = word32(x)<<8 | word32(x)<<16 | word32(f2(x))<<24;
+ Te[i] = word64(y | f3(x))<<32 | y;
+#else
+ word32 y = f3(x) | word32(x)<<8 | word32(x)<<16 | word32(f2(x))<<24;
+ for (int j=0; j<4; j++)
+ {
+ Te[i+j*256] = y;
+ y = rotrFixed(y, 8);
+ }
+#endif
+ }
+#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
+ Te[256] = Te[257] = 0;
+#endif
+ s_TeFilled = true;
+}
+
+void Rijndael::Base::FillDecTable()
+{
+ for (int i=0; i<256; i++)
+ {
+ byte x = Sd[i];
+#ifdef CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS_
+ word32 y = word32(fd(x))<<8 | word32(f9(x))<<16 | word32(fe(x))<<24;
+ Td[i] = word64(y | fb(x))<<32 | y | x;
+#else
+ word32 y = fb(x) | word32(fd(x))<<8 | word32(f9(x))<<16 | word32(fe(x))<<24;;
+ for (int j=0; j<4; j++)
+ {
+ Td[i+j*256] = y;
+ y = rotrFixed(y, 8);
+ }
+#endif
+ }
+ s_TdFilled = true;
+}
+
void Rijndael::Base::UncheckedSetKey(const byte *userKey, unsigned int keylen, const NameValuePairs &)
{
AssertValidKeyLength(keylen);
@@ -106,8 +182,16 @@ void Rijndael::Base::UncheckedSetKey(const byte *userKey, unsigned int keylen, c
rk += keylen/4;
}
- if (!IsForwardTransformation())
+ if (IsForwardTransformation())
{
+ if (!s_TeFilled)
+ FillEncTable();
+ }
+ else
+ {
+ if (!s_TdFilled)
+ FillDecTable();
+
unsigned int i, j;
rk = m_key;
@@ -148,349 +232,530 @@ void Rijndael::Base::UncheckedSetKey(const byte *userKey, unsigned int keylen, c
ConditionalByteReverse(BIG_ENDIAN_ORDER, m_key + m_rounds*4, m_key + m_rounds*4, 16);
}
-#ifdef CRYPTOPP_X64_MASM_AVAILABLE
-extern "C" {
-void Rijndael_Enc_ProcessAndXorBlock(const word32 *table, word32 cacheLineSize, const word32 *k, const word32 *kLoopEnd, const byte *inBlock, const byte *xorBlock, byte *outBlock);
-}
-#endif
-
#pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code
-void Rijndael::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
+#endif // #ifndef CRYPTOPP_GENERATE_X64_MASM
+
+#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
+
+CRYPTOPP_NAKED void CRYPTOPP_FASTCALL Rijndael_Enc_AdvancedProcessBlocks(void *locals, const word32 *k)
{
-#endif // #ifdef CRYPTOPP_GENERATE_X64_MASM
+#if CRYPTOPP_BOOL_X86
+
+#define L_REG esp
+#define L_INDEX(i) (L_REG+512+i)
+#define L_INXORBLOCKS L_INBLOCKS+4
+#define L_OUTXORBLOCKS L_INBLOCKS+8
+#define L_OUTBLOCKS L_INBLOCKS+12
+#define L_INCREMENTS L_INDEX(16*15)
+#define L_SP L_INDEX(16*16)
+#define L_LENGTH L_INDEX(16*16+4)
+#define L_KEYS_BEGIN L_INDEX(16*16+8)
+
+#define MOVD movd
+#define MM(i) mm##i
+
+#define MXOR(a,b,c) \
+ AS2( movzx ebp, b)\
+ AS2( movd mm7, DWORD PTR [WORD_REG(si)+8*WORD_REG(bp)+MAP0TO4(c)])\
+ AS2( pxor MM(a), mm7)\
+
+#define MMOV(a,b,c) \
+ AS2( movzx ebp, b)\
+ AS2( movd MM(a), DWORD PTR [WORD_REG(si)+8*WORD_REG(bp)+MAP0TO4(c)])\
+
+#else
+
+#define L_REG r8
+#define L_INDEX(i) (r8+i)
+#define L_INXORBLOCKS L_INBLOCKS+8
+#define L_OUTXORBLOCKS L_INBLOCKS+16
+#define L_OUTBLOCKS L_INBLOCKS+24
+#define L_INCREMENTS L_INDEX(16*16)
+#define L_BP L_INDEX(16*18)
+#define L_LENGTH L_INDEX(16*18+8)
+#define L_KEYS_BEGIN L_INDEX(16*19)
+
+#define MOVD mov
+#define MM(i) r1##i##d
+
+#define MXOR(a,b,c) \
+ AS2( movzx ebp, b)\
+ AS2( xor MM(a), DWORD PTR [WORD_REG(si)+8*WORD_REG(bp)+MAP0TO4(c)])\
+
+#define MMOV(a,b,c) \
+ AS2( movzx ebp, b)\
+ AS2( mov MM(a), DWORD PTR [WORD_REG(si)+8*WORD_REG(bp)+MAP0TO4(c)])\
-#ifdef CRYPTOPP_X64_MASM_AVAILABLE
- Rijndael_Enc_ProcessAndXorBlock(Te, g_cacheLineSize, m_key, m_key + m_rounds*4, inBlock, xorBlock, outBlock);
- return;
#endif
-#if defined(CRYPTOPP_X86_ASM_AVAILABLE)
- #ifdef CRYPTOPP_GENERATE_X64_MASM
+#define L_SUBKEYS L_INDEX(0)
+#define L_SAVED_X L_SUBKEYS
+#define L_KEY12 L_INDEX(16*12)
+#define L_LASTROUND L_INDEX(16*13)
+#define L_INBLOCKS L_INDEX(16*14)
+#define MAP0TO4(i) (ASM_MOD(i+3,4)+1)
+
+#define XOR(a,b,c) \
+ AS2( movzx ebp, b)\
+ AS2( xor a, DWORD PTR [WORD_REG(si)+8*WORD_REG(bp)+MAP0TO4(c)])\
+
+#define MOV(a,b,c) \
+ AS2( movzx ebp, b)\
+ AS2( mov a, DWORD PTR [WORD_REG(si)+8*WORD_REG(bp)+MAP0TO4(c)])\
+
+#ifdef CRYPTOPP_GENERATE_X64_MASM
ALIGN 8
- Rijndael_Enc_ProcessAndXorBlock PROC FRAME
- rex_push_reg rbx
- push_reg rsi
+ Rijndael_Enc_AdvancedProcessBlocks PROC FRAME
+ rex_push_reg rsi
push_reg rdi
+ push_reg rbx
+ push_reg rbp
push_reg r12
- push_reg r13
- push_reg r14
- push_reg r15
.endprolog
- mov AS_REG_7, rcx
- mov rdi, [rsp + 5*8 + 7*8] ; inBlock
- #else
- if (HasMMX())
- {
- const word32 *k = m_key;
- const word32 *kLoopEnd = k + m_rounds*4;
- #endif
-
- #if CRYPTOPP_BOOL_X64
- #define K_REG r8
- #define K_END_REG r9
- #define SAVE_K
- #define RESTORE_K
- #define RESTORE_K_END
- #define SAVE_0(x) AS2(mov r13d, x)
- #define SAVE_1(x) AS2(mov r14d, x)
- #define SAVE_2(x) AS2(mov r15d, x)
- #define RESTORE_0(x) AS2(mov x, r13d)
- #define RESTORE_1(x) AS2(mov x, r14d)
- #define RESTORE_2(x) AS2(mov x, r15d)
- #else
- #define K_REG esi
- #define K_END_REG edi
- #define SAVE_K AS2(movd mm4, esi)
- #define RESTORE_K AS2(movd esi, mm4)
- #define RESTORE_K_END AS2(movd edi, mm5)
- #define SAVE_0(x) AS2(movd mm0, x)
- #define SAVE_1(x) AS2(movd mm1, x)
- #define SAVE_2(x) AS2(movd mm2, x)
- #define RESTORE_0(x) AS2(movd x, mm0)
- #define RESTORE_1(x) AS2(movd x, mm1)
- #define RESTORE_2(x) AS2(movd x, mm2)
- #endif
-#ifdef __GNUC__
- word32 t0, t1, t2, t3;
- __asm__ __volatile__
- (
- ".intel_syntax noprefix;"
+ mov r8, rcx
+ mov rsi, ?Te@rdtable@CryptoPP@@3PA_KA
+ mov rdi, QWORD PTR [?g_cacheLineSize@CryptoPP@@3IA]
+#elif defined(__GNUC__)
+ __asm__ __volatile__
+ (
+ ".intel_syntax noprefix;"
+ ASL(Rijndael_Enc_AdvancedProcessBlocks)
#if CRYPTOPP_BOOL_X64
- AS2( mov K_REG, rsi)
- AS2( mov K_END_REG, rcx)
- #else
- AS1( push ebx)
- AS1( push ebp)
- AS2( movd mm5, ecx)
+ AS2( mov r8, rcx)
+ AS2( mov [L_BP], rbp)
#endif
- AS2( mov AS_REG_7, WORD_REG(ax))
-#elif CRYPTOPP_BOOL_X86
- #if _MSC_VER < 1300
- const word32 *t = Te;
- AS2( mov eax, t)
- #endif
- AS2( mov edx, g_cacheLineSize)
- AS2( mov WORD_REG(di), inBlock)
- AS2( mov K_REG, k)
- AS2( movd mm5, kLoopEnd)
- #if _MSC_VER < 1300
- AS1( push ebx)
- AS1( push ebp)
- AS2( mov AS_REG_7, eax)
- #else
- AS1( push ebp)
- AS2( lea AS_REG_7, Te)
- #endif
-#endif
- AS2( mov eax, [K_REG+0*4]) // s0
- AS2( xor eax, [WORD_REG(di)+0*4])
- SAVE_0(eax)
- AS2( mov ebx, [K_REG+1*4])
- AS2( xor ebx, [WORD_REG(di)+1*4])
- SAVE_1(ebx)
- AS2( and ebx, eax)
- AS2( mov eax, [K_REG+2*4])
- AS2( xor eax, [WORD_REG(di)+2*4])
- SAVE_2(eax)
- AS2( and ebx, eax)
- AS2( mov ecx, [K_REG+3*4])
- AS2( xor ecx, [WORD_REG(di)+3*4])
- AS2( and ebx, ecx)
-
- // read Te0 into L1 cache. this code could be simplifed by using lfence, but that is an SSE2 instruction
- AS2( and ebx, 0)
- AS2( mov edi, ebx) // make index depend on previous loads to simulate lfence
- ASL(2)
- AS2( and ebx, [AS_REG_7+WORD_REG(di)])
- AS2( add edi, edx)
- AS2( and ebx, [AS_REG_7+WORD_REG(di)])
- AS2( add edi, edx)
- AS2( and ebx, [AS_REG_7+WORD_REG(di)])
- AS2( add edi, edx)
- AS2( and ebx, [AS_REG_7+WORD_REG(di)])
- AS2( add edi, edx)
- AS2( cmp edi, 1024)
- ASJ( jl, 2, b)
- AS2( and ebx, [AS_REG_7+1020])
-#if CRYPTOPP_BOOL_X64
- AS2( xor r13d, ebx)
- AS2( xor r14d, ebx)
- AS2( xor r15d, ebx)
#else
- AS2( movd mm6, ebx)
- AS2( pxor mm2, mm6)
- AS2( pxor mm1, mm6)
- AS2( pxor mm0, mm6)
+ AS1( push esi)
+ AS1( push edi)
+ AS2( lea esi, [Te])
+ AS2( mov edi, [g_cacheLineSize])
#endif
- AS2( xor ecx, ebx)
- AS2( mov edi, [K_REG+4*4]) // t0
- AS2( mov eax, [K_REG+5*4])
- AS2( mov ebx, [K_REG+6*4])
- AS2( mov edx, [K_REG+7*4])
- AS2( add K_REG, 8*4)
- SAVE_K
-
-#define QUARTER_ROUND(t, a, b, c, d) \
- AS2(movzx esi, t##l)\
- AS2(d, [AS_REG_7+0*1024+4*WORD_REG(si)])\
- AS2(movzx esi, t##h)\
- AS2(c, [AS_REG_7+1*1024+4*WORD_REG(si)])\
- AS2(shr e##t##x, 16)\
- AS2(movzx esi, t##l)\
- AS2(b, [AS_REG_7+2*1024+4*WORD_REG(si)])\
- AS2(movzx esi, t##h)\
- AS2(a, [AS_REG_7+3*1024+4*WORD_REG(si)])
-
-#define s0 xor edi
-#define s1 xor eax
-#define s2 xor ebx
-#define s3 xor ecx
-#define t0 xor edi
-#define t1 xor eax
-#define t2 xor ebx
-#define t3 xor edx
-
- QUARTER_ROUND(c, t0, t1, t2, t3)
- RESTORE_2(ecx)
- QUARTER_ROUND(c, t3, t0, t1, t2)
- RESTORE_1(ecx)
- QUARTER_ROUND(c, t2, t3, t0, t1)
- RESTORE_0(ecx)
- QUARTER_ROUND(c, t1, t2, t3, t0)
- SAVE_2(ebx)
- SAVE_1(eax)
- SAVE_0(edi)
-#undef QUARTER_ROUND
+#if CRYPTOPP_BOOL_X86
+ AS_PUSH_IF86( bx)
+ AS_PUSH_IF86( bp)
+ AS2( mov [ecx+16*12+16*4], esp)
+ AS2( lea esp, [ecx-512])
+#endif
- RESTORE_K
+ // copy subkeys to stack
+ AS2( mov WORD_REG(bp), [L_KEYS_BEGIN])
+ AS2( mov WORD_REG(ax), 16)
+ AS2( and WORD_REG(ax), WORD_REG(bp))
+ AS2( movdqa xmm3, XMMWORD_PTR [WORD_REG(dx)+16+WORD_REG(ax)]) // subkey 1 (non-counter) or 2 (counter)
+ AS2( movdqa [L_KEY12], xmm3)
+ AS2( lea WORD_REG(ax), [WORD_REG(dx)+WORD_REG(ax)+2*16])
+ AS2( sub WORD_REG(ax), WORD_REG(bp))
+ ASL(0)
+ AS2( movdqa xmm0, [WORD_REG(ax)+WORD_REG(bp)])
+ AS2( movdqa XMMWORD_PTR [L_SUBKEYS+WORD_REG(bp)], xmm0)
+ AS2( add WORD_REG(bp), 16)
+ AS2( cmp WORD_REG(bp), 16*12)
+ ASJ( jl, 0, b)
+
+ // read subkeys 0, 1 and last
+ AS2( movdqa xmm4, [WORD_REG(ax)+WORD_REG(bp)]) // last subkey
+ AS2( movdqa xmm1, [WORD_REG(dx)]) // subkey 0
+ AS2( MOVD MM(1), [WORD_REG(dx)+4*4]) // 0,1,2,3
+ AS2( mov ebx, [WORD_REG(dx)+5*4]) // 4,5,6,7
+ AS2( mov ecx, [WORD_REG(dx)+6*4]) // 8,9,10,11
+ AS2( mov edx, [WORD_REG(dx)+7*4]) // 12,13,14,15
+
+ // load table into cache
+ AS2( xor WORD_REG(ax), WORD_REG(ax))
+ ASL(9)
+ AS2( mov ebp, [WORD_REG(si)+WORD_REG(ax)])
+ AS2( add WORD_REG(ax), WORD_REG(di))
+ AS2( mov ebp, [WORD_REG(si)+WORD_REG(ax)])
+ AS2( add WORD_REG(ax), WORD_REG(di))
+ AS2( mov ebp, [WORD_REG(si)+WORD_REG(ax)])
+ AS2( add WORD_REG(ax), WORD_REG(di))
+ AS2( mov ebp, [WORD_REG(si)+WORD_REG(ax)])
+ AS2( add WORD_REG(ax), WORD_REG(di))
+ AS2( cmp WORD_REG(ax), 2048)
+ ASJ( jl, 9, b)
+ AS1( lfence)
+
+ AS2( test DWORD PTR [L_LENGTH], 1)
+ ASJ( jz, 8, f)
+
+ // counter mode one-time setup
+ AS2( mov WORD_REG(bp), [L_INBLOCKS])
+ AS2( movdqa xmm2, [WORD_REG(bp)]) // counter
+ AS2( pxor xmm2, xmm1)
+ AS2( psrldq xmm1, 14)
+ AS2( movd eax, xmm1)
+ AS2( mov al, BYTE PTR [WORD_REG(bp)+15])
+ AS2( MOVD MM(2), eax)
+#if CRYPTOPP_BOOL_X86
+ AS2( mov eax, 1)
+ AS2( movd mm3, eax)
+#endif
- ASL(0)
- AS2( mov edi, [K_REG+0*4])
- AS2( mov eax, [K_REG+1*4])
- AS2( mov ebx, [K_REG+2*4])
- AS2( mov ecx, [K_REG+3*4])
+ // partial first round, in: xmm2(15,14,13,12;11,10,9,8;7,6,5,4;3,2,1,0), out: mm1, ebx, ecx, edx
+ AS2( movd eax, xmm2)
+ AS2( psrldq xmm2, 4)
+ AS2( movd edi, xmm2)
+ AS2( psrldq xmm2, 4)
+ MXOR( 1, al, 0) // 0
+ XOR( edx, ah, 1) // 1
+ AS2( shr eax, 16)
+ XOR( ecx, al, 2) // 2
+ XOR( ebx, ah, 3) // 3
+ AS2( mov eax, edi)
+ AS2( movd edi, xmm2)
+ AS2( psrldq xmm2, 4)
+ XOR( ebx, al, 0) // 4
+ MXOR( 1, ah, 1) // 5
+ AS2( shr eax, 16)
+ XOR( edx, al, 2) // 6
+ XOR( ecx, ah, 3) // 7
+ AS2( mov eax, edi)
+ AS2( movd edi, xmm2)
+ XOR( ecx, al, 0) // 8
+ XOR( ebx, ah, 1) // 9
+ AS2( shr eax, 16)
+ MXOR( 1, al, 2) // 10
+ XOR( edx, ah, 3) // 11
+ AS2( mov eax, edi)
+ XOR( edx, al, 0) // 12
+ XOR( ecx, ah, 1) // 13
+ AS2( shr eax, 16)
+ XOR( ebx, al, 2) // 14
+ AS2( psrldq xmm2, 3)
+
+ // partial second round, in: ebx(4,5,6,7), ecx(8,9,10,11), edx(12,13,14,15), out: eax, ebx, edi, mm0
+ AS2( mov eax, [L_KEY12+0*4])
+ AS2( mov edi, [L_KEY12+2*4])
+ AS2( MOVD MM(0), [L_KEY12+3*4])
+ MXOR( 0, cl, 3) /* 11 */
+ XOR( edi, bl, 3) /* 7 */
+ MXOR( 0, bh, 2) /* 6 */
+ AS2( shr ebx, 16) /* 4,5 */
+ XOR( eax, bl, 1) /* 5 */
+ MOV( ebx, bh, 0) /* 4 */
+ AS2( xor ebx, [L_KEY12+1*4])
+ XOR( eax, ch, 2) /* 10 */
+ AS2( shr ecx, 16) /* 8,9 */
+ XOR( eax, dl, 3) /* 15 */
+ XOR( ebx, dh, 2) /* 14 */
+ AS2( shr edx, 16) /* 12,13 */
+ XOR( edi, ch, 0) /* 8 */
+ XOR( ebx, cl, 1) /* 9 */
+ XOR( edi, dl, 1) /* 13 */
+ MXOR( 0, dh, 0) /* 12 */
+
+ AS2( movd ecx, xmm2)
+ AS2( MOVD edx, MM(1))
+ AS2( MOVD [L_SAVED_X+3*4], MM(0))
+ AS2( mov [L_SAVED_X+0*4], eax)
+ AS2( mov [L_SAVED_X+1*4], ebx)
+ AS2( mov [L_SAVED_X+2*4], edi)
+ ASJ( jmp, 5, f)
+
+ ASL(3)
+ // non-counter mode per-block setup
+ AS2( MOVD MM(1), [L_KEY12+0*4]) // 0,1,2,3
+ AS2( mov ebx, [L_KEY12+1*4]) // 4,5,6,7
+ AS2( mov ecx, [L_KEY12+2*4]) // 8,9,10,11
+ AS2( mov edx, [L_KEY12+3*4]) // 12,13,14,15
+ ASL(8)
+ AS2( mov WORD_REG(ax), [L_INBLOCKS])
+ AS2( movdqu xmm2, [WORD_REG(ax)])
+ AS2( mov WORD_REG(bp), [L_INXORBLOCKS])
+ AS2( movdqu xmm5, [WORD_REG(bp)])
+ AS2( pxor xmm2, xmm1)
+ AS2( pxor xmm2, xmm5)
+
+ // first round, in: xmm2(15,14,13,12;11,10,9,8;7,6,5,4;3,2,1,0), out: eax, ebx, ecx, edx
+ AS2( movd eax, xmm2)
+ AS2( psrldq xmm2, 4)
+ AS2( movd edi, xmm2)
+ AS2( psrldq xmm2, 4)
+ MXOR( 1, al, 0) // 0
+ XOR( edx, ah, 1) // 1
+ AS2( shr eax, 16)
+ XOR( ecx, al, 2) // 2
+ XOR( ebx, ah, 3) // 3
+ AS2( mov eax, edi)
+ AS2( movd edi, xmm2)
+ AS2( psrldq xmm2, 4)
+ XOR( ebx, al, 0) // 4
+ MXOR( 1, ah, 1) // 5
+ AS2( shr eax, 16)
+ XOR( edx, al, 2) // 6
+ XOR( ecx, ah, 3) // 7
+ AS2( mov eax, edi)
+ AS2( movd edi, xmm2)
+ XOR( ecx, al, 0) // 8
+ XOR( ebx, ah, 1) // 9
+ AS2( shr eax, 16)
+ MXOR( 1, al, 2) // 10
+ XOR( edx, ah, 3) // 11
+ AS2( mov eax, edi)
+ XOR( edx, al, 0) // 12
+ XOR( ecx, ah, 1) // 13
+ AS2( shr eax, 16)
+ XOR( ebx, al, 2) // 14
+ MXOR( 1, ah, 3) // 15
+ AS2( MOVD eax, MM(1))
+
+ AS2( add L_REG, [L_KEYS_BEGIN])
+ AS2( add L_REG, 4*16)
+ ASJ( jmp, 2, f)
+
+ ASL(1)
+ // counter-mode per-block setup
+ AS2( MOVD ecx, MM(2))
+ AS2( MOVD edx, MM(1))
+ AS2( mov eax, [L_SAVED_X+0*4])
+ AS2( mov ebx, [L_SAVED_X+1*4])
+ AS2( xor cl, ch)
+ AS2( and WORD_REG(cx), 255)
+ ASL(5)
+#if CRYPTOPP_BOOL_X86
+ AS2( paddb MM(2), mm3)
+#else
+ AS2( add MM(2), 1)
+#endif
+ // remaining part of second round, in: edx(previous round),ebp(keyed counter byte) eax,ebx,[L_SAVED_X+2*4],[L_SAVED_X+3*4], out: eax,ebx,ecx,edx
+ AS2( xor edx, DWORD PTR [WORD_REG(si)+WORD_REG(cx)*8+3])
+ XOR( ebx, dl, 3)
+ MOV( ecx, dh, 2)
+ AS2( shr edx, 16)
+ AS2( xor ecx, [L_SAVED_X+2*4])
+ XOR( eax, dh, 0)
+ MOV( edx, dl, 1)
+ AS2( xor edx, [L_SAVED_X+3*4])
+
+ AS2( add L_REG, [L_KEYS_BEGIN])
+ AS2( add L_REG, 3*16)
+ ASJ( jmp, 4, f)
+
+// in: eax(0,1,2,3), ebx(4,5,6,7), ecx(8,9,10,11), edx(12,13,14,15)
+// out: eax, ebx, edi, mm0
+#define ROUND() \
+ MXOR( 0, cl, 3) /* 11 */\
+ AS2( mov cl, al) /* 8,9,10,3 */\
+ XOR( edi, ah, 2) /* 2 */\
+ AS2( shr eax, 16) /* 0,1 */\
+ XOR( edi, bl, 3) /* 7 */\
+ MXOR( 0, bh, 2) /* 6 */\
+ AS2( shr ebx, 16) /* 4,5 */\
+ MXOR( 0, al, 1) /* 1 */\
+ MOV( eax, ah, 0) /* 0 */\
+ XOR( eax, bl, 1) /* 5 */\
+ MOV( ebx, bh, 0) /* 4 */\
+ XOR( eax, ch, 2) /* 10 */\
+ XOR( ebx, cl, 3) /* 3 */\
+ AS2( shr ecx, 16) /* 8,9 */\
+ XOR( eax, dl, 3) /* 15 */\
+ XOR( ebx, dh, 2) /* 14 */\
+ AS2( shr edx, 16) /* 12,13 */\
+ XOR( edi, ch, 0) /* 8 */\
+ XOR( ebx, cl, 1) /* 9 */\
+ XOR( edi, dl, 1) /* 13 */\
+ MXOR( 0, dh, 0) /* 12 */\
+
+ ASL(2) // 2-round loop
+ AS2( MOVD MM(0), [L_SUBKEYS-4*16+3*4])
+ AS2( mov edi, [L_SUBKEYS-4*16+2*4])
+ ROUND()
+ AS2( mov ecx, edi)
+ AS2( xor eax, [L_SUBKEYS-4*16+0*4])
+ AS2( xor ebx, [L_SUBKEYS-4*16+1*4])
+ AS2( MOVD edx, MM(0))
+
+ ASL(4)
+ AS2( MOVD MM(0), [L_SUBKEYS-4*16+7*4])
+ AS2( mov edi, [L_SUBKEYS-4*16+6*4])
+ ROUND()
+ AS2( mov ecx, edi)
+ AS2( xor eax, [L_SUBKEYS-4*16+4*4])
+ AS2( xor ebx, [L_SUBKEYS-4*16+5*4])
+ AS2( MOVD edx, MM(0))
+
+ AS2( add L_REG, 32)
+ AS2( test L_REG, 255)
+ ASJ( jnz, 2, b)
+ AS2( sub L_REG, 16*16)
+
+#define LAST(a, b, c) \
+ AS2( movzx ebp, a )\
+ AS2( movzx edi, BYTE PTR [WORD_REG(si)+WORD_REG(bp)*8+1] )\
+ AS2( movzx ebp, b )\
+ AS2( xor edi, DWORD PTR [WORD_REG(si)+WORD_REG(bp)*8+0] )\
+ AS2( mov WORD PTR [L_LASTROUND+c], di )\
+
+ // last round
+ LAST(ch, dl, 2)
+ LAST(dh, al, 6)
+ AS2( shr edx, 16)
+ LAST(ah, bl, 10)
+ AS2( shr eax, 16)
+ LAST(bh, cl, 14)
+ AS2( shr ebx, 16)
+ LAST(dh, al, 12)
+ AS2( shr ecx, 16)
+ LAST(ah, bl, 0)
+ LAST(bh, cl, 4)
+ LAST(ch, dl, 8)
+
+ AS2( mov WORD_REG(ax), [L_OUTXORBLOCKS])
+ AS2( mov WORD_REG(bx), [L_OUTBLOCKS])
+
+ AS2( mov WORD_REG(cx), [L_LENGTH])
+ AS2( sub WORD_REG(cx), 16)
+
+ AS2( movdqu xmm2, [WORD_REG(ax)])
+ AS2( pxor xmm2, xmm4)
-#define QUARTER_ROUND(t, a, b, c, d) \
- AS2(movzx esi, t##l)\
- AS2(a, [AS_REG_7+3*1024+4*WORD_REG(si)])\
- AS2(movzx esi, t##h)\
- AS2(b, [AS_REG_7+2*1024+4*WORD_REG(si)])\
- AS2(shr e##t##x, 16)\
- AS2(movzx esi, t##l)\
- AS2(c, [AS_REG_7+1*1024+4*WORD_REG(si)])\
- AS2(movzx esi, t##h)\
- AS2(d, [AS_REG_7+0*1024+4*WORD_REG(si)])
-
- QUARTER_ROUND(d, s0, s1, s2, s3)
- RESTORE_2(edx)
- QUARTER_ROUND(d, s3, s0, s1, s2)
- RESTORE_1(edx)
- QUARTER_ROUND(d, s2, s3, s0, s1)
- RESTORE_0(edx)
- QUARTER_ROUND(d, s1, s2, s3, s0)
- RESTORE_K
- SAVE_2(ebx)
- SAVE_1(eax)
- SAVE_0(edi)
-
- AS2( mov edi, [K_REG+4*4])
- AS2( mov eax, [K_REG+5*4])
- AS2( mov ebx, [K_REG+6*4])
- AS2( mov edx, [K_REG+7*4])
-
- QUARTER_ROUND(c, t0, t1, t2, t3)
- RESTORE_2(ecx)
- QUARTER_ROUND(c, t3, t0, t1, t2)
- RESTORE_1(ecx)
- QUARTER_ROUND(c, t2, t3, t0, t1)
- RESTORE_0(ecx)
- QUARTER_ROUND(c, t1, t2, t3, t0)
- SAVE_2(ebx)
- SAVE_1(eax)
- SAVE_0(edi)
-
- RESTORE_K
- RESTORE_K_END
- AS2( add K_REG, 8*4)
- SAVE_K
- AS2( cmp K_END_REG, K_REG)
- ASJ( jne, 0, b)
+#if CRYPTOPP_BOOL_X86
+ AS2( movdqa xmm0, [L_INCREMENTS])
+ AS2( paddd xmm0, [L_INBLOCKS])
+ AS2( movdqa [L_INBLOCKS], xmm0)
+#else
+ AS2( movdqa xmm0, [L_INCREMENTS+16])
+ AS2( paddq xmm0, [L_INBLOCKS+16])
+ AS2( movdqa [L_INBLOCKS+16], xmm0)
+#endif
-#undef QUARTER_ROUND
-#undef s0
-#undef s1
-#undef s2
-#undef s3
-#undef t0
-#undef t1
-#undef t2
-#undef t3
-
- AS2( mov eax, [K_END_REG+0*4])
- AS2( mov ecx, [K_END_REG+1*4])
- AS2( mov esi, [K_END_REG+2*4])
- AS2( mov edi, [K_END_REG+3*4])
-
-#define QUARTER_ROUND(a, b, c, d) \
- AS2( movzx ebx, dl)\
- AS2( movzx ebx, BYTE PTR [AS_REG_7+1+4*WORD_REG(bx)])\
- AS2( shl ebx, 3*8)\
- AS2( xor a, ebx)\
- AS2( movzx ebx, dh)\
- AS2( movzx ebx, BYTE PTR [AS_REG_7+1+4*WORD_REG(bx)])\
- AS2( shl ebx, 2*8)\
- AS2( xor b, ebx)\
- AS2( shr edx, 16)\
- AS2( movzx ebx, dl)\
- AS2( shr edx, 8)\
- AS2( movzx ebx, BYTE PTR [AS_REG_7+1+4*WORD_REG(bx)])\
- AS2( shl ebx, 1*8)\
- AS2( xor c, ebx)\
- AS2( movzx ebx, BYTE PTR [AS_REG_7+1+4*WORD_REG(dx)])\
- AS2( xor d, ebx)
-
- QUARTER_ROUND(eax, ecx, esi, edi)
- RESTORE_2(edx)
- QUARTER_ROUND(edi, eax, ecx, esi)
- RESTORE_1(edx)
- QUARTER_ROUND(esi, edi, eax, ecx)
- RESTORE_0(edx)
- QUARTER_ROUND(ecx, esi, edi, eax)
+ AS2( pxor xmm2, [L_LASTROUND])
+ AS2( movdqu [WORD_REG(bx)], xmm2)
-#undef QUARTER_ROUND
+ ASJ( jle, 7, f)
+ AS2( mov [L_LENGTH], WORD_REG(cx))
+ AS2( test WORD_REG(cx), 1)
+ ASJ( jnz, 1, b)
+#if CRYPTOPP_BOOL_X64
+ AS2( movdqa xmm0, [L_INCREMENTS])
+ AS2( paddd xmm0, [L_INBLOCKS])
+ AS2( movdqa [L_INBLOCKS], xmm0)
+#endif
+ ASJ( jmp, 3, b)
+ ASL(7)
#if CRYPTOPP_BOOL_X86
- AS1(emms)
- AS1(pop ebp)
- #if defined(__GNUC__) || (defined(_MSC_VER) && _MSC_VER < 1300)
- AS1(pop ebx)
- #endif
+ AS2( mov esp, [L_SP])
+ AS1( emms)
+#else
+ AS2( mov rbp, [L_BP])
+#endif
+ AS_POP_IF86( bp)
+ AS_POP_IF86( bx)
+#ifndef __GNUC__
+ AS_POP_IF86( di)
+ AS_POP_IF86( si)
+#endif
+#ifdef CRYPTOPP_GENERATE_X64_MASM
+ pop r12
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ ret
+ Rijndael_Enc_AdvancedProcessBlocks ENDP
+#else
+ AS1( ret)
#endif
-
#ifdef __GNUC__
- ".att_syntax prefix;"
- : "=a" (t0), "=c" (t1), "=S" (t2), "=D" (t3)
- : "a" (Te), "D" (inBlock), "S" (k), "c" (kLoopEnd), "d" (g_cacheLineSize)
- : "memory", "cc"
- #if CRYPTOPP_BOOL_X64
- , "%ebx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15"
- #endif
- );
+ ".att_syntax prefix;"
+ );
+#endif
+}
- if (xorBlock)
- {
- t0 ^= ((const word32 *)xorBlock)[0];
- t1 ^= ((const word32 *)xorBlock)[1];
- t2 ^= ((const word32 *)xorBlock)[2];
- t3 ^= ((const word32 *)xorBlock)[3];
- }
- ((word32 *)outBlock)[0] = t0;
- ((word32 *)outBlock)[1] = t1;
- ((word32 *)outBlock)[2] = t2;
- ((word32 *)outBlock)[3] = t3;
-#else
- #if CRYPTOPP_BOOL_X64
- mov rbx, [rsp + 6*8 + 7*8] ; xorBlock
- #else
- AS2( mov ebx, xorBlock)
- #endif
- AS2( test WORD_REG(bx), WORD_REG(bx))
- ASJ( jz, 1, f)
- AS2( xor eax, [WORD_REG(bx)+0*4])
- AS2( xor ecx, [WORD_REG(bx)+1*4])
- AS2( xor esi, [WORD_REG(bx)+2*4])
- AS2( xor edi, [WORD_REG(bx)+3*4])
- ASL(1)
- #if CRYPTOPP_BOOL_X64
- mov rbx, [rsp + 7*8 + 7*8] ; outBlock
- #else
- AS2( mov ebx, outBlock)
- #endif
- AS2( mov [WORD_REG(bx)+0*4], eax)
- AS2( mov [WORD_REG(bx)+1*4], ecx)
- AS2( mov [WORD_REG(bx)+2*4], esi)
- AS2( mov [WORD_REG(bx)+3*4], edi)
#endif
-#if CRYPTOPP_GENERATE_X64_MASM
- pop r15
- pop r14
- pop r13
- pop r12
- pop rdi
- pop rsi
- pop rbx
- ret
- Rijndael_Enc_ProcessAndXorBlock ENDP
-#else
+#ifndef CRYPTOPP_GENERATE_X64_MASM
+
+#ifdef CRYPTOPP_X64_MASM_AVAILABLE
+extern "C" {
+void Rijndael_Enc_AdvancedProcessBlocks(void *locals, const word32 *k);
+}
+#endif
+
+static inline bool AliasedWithTable(const byte *begin, const byte *end)
+{
+ size_t s0 = size_t(begin)%4096, s1 = size_t(end)%4096;
+ size_t t0 = size_t(Te)%4096, t1 = (size_t(Te)+sizeof(Te))%4096;
+ if (t1 > t0)
+ return (s0 >= t0 && s0 < t1) || (s1 > t0 && s1 <= t1);
+ else
+ return (s0 < t1 || s1 <= t1) || (s0 >= t0 || s1 > t0);
+}
+
+size_t Rijndael::Enc::AdvancedProcessBlocks(const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) const
+{
+ if (length < BLOCKSIZE)
+ return length;
+
+ if (HasSSE2())
+ {
+ struct Locals
+ {
+ word32 subkeys[4*12], workspace[8];
+ const byte *inBlocks, *inXorBlocks, *outXorBlocks;
+ byte *outBlocks;
+ size_t inIncrement, inXorIncrement, outXorIncrement, outIncrement;
+ size_t regSpill, lengthAndCounterFlag, keysBegin;
+ };
+
+ const byte* zeros = (byte *)(Te+256);
+ byte *space;
+
+ do {
+ space = (byte *)alloca(255+sizeof(Locals));
+ space += (256-(size_t)space%256)%256;
+ }
+ while (AliasedWithTable(space, space+sizeof(Locals)));
+
+ Locals &locals = *(Locals *)space;
+
+ locals.inBlocks = inBlocks;
+ locals.inXorBlocks = (flags & BT_XorInput) && xorBlocks ? xorBlocks : zeros;
+ locals.outXorBlocks = (flags & BT_XorInput) || !xorBlocks ? zeros : xorBlocks;
+ locals.outBlocks = outBlocks;
+
+ locals.inIncrement = (flags & BT_DontIncrementInOutPointers) ? 0 : BLOCKSIZE;
+ locals.inXorIncrement = (flags & BT_XorInput) && xorBlocks ? BLOCKSIZE : 0;
+ locals.outXorIncrement = (flags & BT_XorInput) || !xorBlocks ? 0 : BLOCKSIZE;
+ locals.outIncrement = (flags & BT_DontIncrementInOutPointers) ? 0 : BLOCKSIZE;
+
+ locals.lengthAndCounterFlag = length - (length%16) - bool(flags & BT_InBlockIsCounter);
+ int keysToCopy = m_rounds - (flags & BT_InBlockIsCounter ? 3 : 2);
+ locals.keysBegin = (12-keysToCopy)*16;
+
+ #ifdef __GNUC__
+ __asm__ __volatile__
+ (
+ AS1(call Rijndael_Enc_AdvancedProcessBlocks)
+ :
+ : "c" (&locals), "d" (m_key.begin()), "S" (Te), "D" (g_cacheLineSize)
+ : "memory", "cc", "%eax"
+ #if CRYPTOPP_BOOL_X64
+ , "%rbx", "%r8", "%r10", "%r11", "%r12"
+ #endif
+ );
+ #else
+ Rijndael_Enc_AdvancedProcessBlocks(&locals, m_key);
+ #endif
+ return length%16;
}
else
-#endif
-#endif // #ifdef CRYPTOPP_X86_ASM_AVAILABLE
-#ifndef CRYPTOPP_GENERATE_X64_MASM
+ return BlockTransformation::AdvancedProcessBlocks(inBlocks, xorBlocks, outBlocks, length, flags);
+}
+
+void Rijndael::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
+{
+#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
+ if (HasSSE2())
{
+ Rijndael::Enc::AdvancedProcessBlocks(inBlock, xorBlock, outBlock, 16, 0);
+ return;
+ }
+#endif
+
word32 s0, s1, s2, s3, t0, t1, t2, t3;
const word32 *rk = m_key;
@@ -508,42 +773,56 @@ void Rijndael::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock
const int cacheLineSize = GetCacheLineSize();
unsigned int i;
word32 u = 0;
+#ifdef CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS
+ for (i=0; i<2048; i+=cacheLineSize)
+#else
for (i=0; i<1024; i+=cacheLineSize)
+#endif
u &= *(const word32 *)(((const byte *)Te)+i);
u &= Te[255];
s0 |= u; s1 |= u; s2 |= u; s3 |= u;
- // first round
-#ifdef IS_BIG_ENDIAN
#define QUARTER_ROUND(t, a, b, c, d) \
- a ^= rotrFixed(Te[byte(t)], 24); t >>= 8;\
- b ^= rotrFixed(Te[byte(t)], 16); t >>= 8;\
- c ^= rotrFixed(Te[byte(t)], 8); t >>= 8;\
- d ^= Te[t];
+ a ^= TL(3, byte(t)); t >>= 8;\
+ b ^= TL(2, byte(t)); t >>= 8;\
+ c ^= TL(1, byte(t)); t >>= 8;\
+ d ^= TL(0, t);
+
+#ifdef IS_LITTLE_ENDIAN
+ #ifdef CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS
+ #define TL(i, x) (*(word32 *)((byte *)Te + x*8 + (6-i)%4+1))
+ #else
+ #define TL(i, x) rotrFixed(Te[x], (3-i)*8)
+ #endif
+ #define QUARTER_ROUND1(t, a, b, c, d) QUARTER_ROUND(t, d, c, b, a)
#else
-#define QUARTER_ROUND(t, a, b, c, d) \
- d ^= Te[byte(t)]; t >>= 8;\
- c ^= rotrFixed(Te[byte(t)], 8); t >>= 8;\
- b ^= rotrFixed(Te[byte(t)], 16); t >>= 8;\
- a ^= rotrFixed(Te[t], 24);
+ #ifdef CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS
+ #define TL(i, x) (*(word32 *)((byte *)Te + x*8 + (4-i)%4))
+ #else
+ #define TL(i, x) rotrFixed(Te[x], i*8)
+ #endif
+ #define QUARTER_ROUND1 QUARTER_ROUND
#endif
- QUARTER_ROUND(s3, t0, t1, t2, t3)
- QUARTER_ROUND(s2, t3, t0, t1, t2)
- QUARTER_ROUND(s1, t2, t3, t0, t1)
- QUARTER_ROUND(s0, t1, t2, t3, t0)
-#undef QUARTER_ROUND
+ QUARTER_ROUND1(s3, t0, t1, t2, t3)
+ QUARTER_ROUND1(s2, t3, t0, t1, t2)
+ QUARTER_ROUND1(s1, t2, t3, t0, t1)
+ QUARTER_ROUND1(s0, t1, t2, t3, t0)
+
+#if defined(CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS) && defined(IS_LITTLE_ENDIAN)
+ #undef TL
+ #define TL(i, x) (*(word32 *)((byte *)Te + x*8 + (i+3)%4+1))
+#endif
+
+#ifndef CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS
+ #undef TL
+ #define TL(i, x) Te[i*256 + x]
+#endif
// Nr - 2 full rounds:
unsigned int r = m_rounds/2 - 1;
do
{
-#define QUARTER_ROUND(t, a, b, c, d) \
- a ^= Te[3*256+byte(t)]; t >>= 8;\
- b ^= Te[2*256+byte(t)]; t >>= 8;\
- c ^= Te[1*256+byte(t)]; t >>= 8;\
- d ^= Te[t];
-
s0 = rk[0]; s1 = rk[1]; s2 = rk[2]; s3 = rk[3];
QUARTER_ROUND(t3, s0, s1, s2, s3)
@@ -562,23 +841,16 @@ void Rijndael::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock
rk += 8;
} while (--r);
- // timing attack countermeasure. see comments at top for more details
- u = 0;
- for (i=0; i<256; i+=cacheLineSize)
- u &= *(const word32 *)(Se+i);
- u &= *(const word32 *)(Se+252);
- t0 |= u; t1 |= u; t2 |= u; t3 |= u;
-
word32 tbw[4];
byte *const tempBlock = (byte *)tbw;
word32 *const obw = (word32 *)outBlock;
const word32 *const xbw = (const word32 *)xorBlock;
#define QUARTER_ROUND(t, a, b, c, d) \
- tempBlock[a] = Se[byte(t)]; t >>= 8;\
- tempBlock[b] = Se[byte(t)]; t >>= 8;\
- tempBlock[c] = Se[byte(t)]; t >>= 8;\
- tempBlock[d] = Se[t];
+ tempBlock[a] = ((byte *)(Te+byte(t)))[1]; t >>= 8;\
+ tempBlock[b] = ((byte *)(Te+byte(t)))[1]; t >>= 8;\
+ tempBlock[c] = ((byte *)(Te+byte(t)))[1]; t >>= 8;\
+ tempBlock[d] = ((byte *)(Te+t))[1];
QUARTER_ROUND(t2, 15, 2, 5, 8)
QUARTER_ROUND(t1, 11, 14, 1, 4)
@@ -600,7 +872,6 @@ void Rijndael::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock
obw[2] = tbw[2] ^ rk[2];
obw[3] = tbw[3] ^ rk[3];
}
- }
}
void Rijndael::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const