summaryrefslogtreecommitdiff
path: root/rijndael.cpp
diff options
context:
space:
mode:
authorweidai <weidai@57ff6487-cd31-0410-9ec3-f628ee90f5f0>2009-03-12 11:24:12 +0000
committerweidai <weidai@57ff6487-cd31-0410-9ec3-f628ee90f5f0>2009-03-12 11:24:12 +0000
commita36c71ecb6840ff799546ccaf665e55f6a6ed5dc (patch)
tree68edc0bccf003f5615716b3ae2d6b97067af39c4 /rijndael.cpp
parentce1fbfcba325116155a605b8519bc0b4e272348f (diff)
downloadcryptopp-a36c71ecb6840ff799546ccaf665e55f6a6ed5dc.tar.gz
- add EAX mode, XSalsa20
- speed up GCM key setup - wipe stack in AES assembly code - speed up CFB mode git-svn-id: svn://svn.code.sf.net/p/cryptopp/code/trunk/c5@444 57ff6487-cd31-0410-9ec3-f628ee90f5f0
Diffstat (limited to 'rijndael.cpp')
-rw-r--r--rijndael.cpp176
1 files changed, 99 insertions, 77 deletions
diff --git a/rijndael.cpp b/rijndael.cpp
index 04e1f21..d6b267a 100644
--- a/rijndael.cpp
+++ b/rijndael.cpp
@@ -457,36 +457,42 @@ CRYPTOPP_NAKED void CRYPTOPP_FASTCALL Rijndael_Enc_AdvancedProcessBlocks(void *l
#define MM(i) mm##i
#define MXOR(a,b,c) \
- AS2( movzx ebp, b)\
- AS2( movd mm7, DWORD PTR [WORD_REG(si)+8*WORD_REG(bp)+MAP0TO4(c)])\
+ AS2( movzx esi, b)\
+ AS2( movd mm7, DWORD PTR [AS_REG_7+8*WORD_REG(si)+MAP0TO4(c)])\
AS2( pxor MM(a), mm7)\
#define MMOV(a,b,c) \
- AS2( movzx ebp, b)\
- AS2( movd MM(a), DWORD PTR [WORD_REG(si)+8*WORD_REG(bp)+MAP0TO4(c)])\
+ AS2( movzx esi, b)\
+ AS2( movd MM(a), DWORD PTR [AS_REG_7+8*WORD_REG(si)+MAP0TO4(c)])\
#else
#define L_REG r8
-#define L_INDEX(i) (r8+i)
+#define L_INDEX(i) (L_REG+i)
#define L_INXORBLOCKS L_INBLOCKS+8
#define L_OUTXORBLOCKS L_INBLOCKS+16
#define L_OUTBLOCKS L_INBLOCKS+24
#define L_INCREMENTS L_INDEX(16*16)
-#define L_BP L_INDEX(16*18)
#define L_LENGTH L_INDEX(16*18+8)
#define L_KEYS_BEGIN L_INDEX(16*19)
#define MOVD mov
-#define MM(i) r1##i##d
+#define MM_0 r9d
+#define MM_1 r12d
+#ifdef __GNUC__
+#define MM_2 r11d
+#else
+#define MM_2 r10d
+#endif
+#define MM(i) MM_##i
#define MXOR(a,b,c) \
- AS2( movzx ebp, b)\
- AS2( xor MM(a), DWORD PTR [WORD_REG(si)+8*WORD_REG(bp)+MAP0TO4(c)])\
+ AS2( movzx esi, b)\
+ AS2( xor MM(a), DWORD PTR [AS_REG_7+8*WORD_REG(si)+MAP0TO4(c)])\
#define MMOV(a,b,c) \
- AS2( movzx ebp, b)\
- AS2( mov MM(a), DWORD PTR [WORD_REG(si)+8*WORD_REG(bp)+MAP0TO4(c)])\
+ AS2( movzx esi, b)\
+ AS2( mov MM(a), DWORD PTR [AS_REG_7+8*WORD_REG(si)+MAP0TO4(c)])\
#endif
@@ -498,12 +504,12 @@ CRYPTOPP_NAKED void CRYPTOPP_FASTCALL Rijndael_Enc_AdvancedProcessBlocks(void *l
#define MAP0TO4(i) (ASM_MOD(i+3,4)+1)
#define XOR(a,b,c) \
- AS2( movzx ebp, b)\
- AS2( xor a, DWORD PTR [WORD_REG(si)+8*WORD_REG(bp)+MAP0TO4(c)])\
+ AS2( movzx esi, b)\
+ AS2( xor a, DWORD PTR [AS_REG_7+8*WORD_REG(si)+MAP0TO4(c)])\
#define MOV(a,b,c) \
- AS2( movzx ebp, b)\
- AS2( mov a, DWORD PTR [WORD_REG(si)+8*WORD_REG(bp)+MAP0TO4(c)])\
+ AS2( movzx esi, b)\
+ AS2( mov a, DWORD PTR [AS_REG_7+8*WORD_REG(si)+MAP0TO4(c)])\
#ifdef CRYPTOPP_GENERATE_X64_MASM
ALIGN 8
@@ -511,54 +517,54 @@ CRYPTOPP_NAKED void CRYPTOPP_FASTCALL Rijndael_Enc_AdvancedProcessBlocks(void *l
rex_push_reg rsi
push_reg rdi
push_reg rbx
- push_reg rbp
push_reg r12
.endprolog
- mov r8, rcx
- mov rsi, ?Te@rdtable@CryptoPP@@3PA_KA
+ mov L_REG, rcx
+ mov AS_REG_7, ?Te@rdtable@CryptoPP@@3PA_KA
mov rdi, QWORD PTR [?g_cacheLineSize@CryptoPP@@3IA]
#elif defined(__GNUC__)
__asm__ __volatile__
(
".intel_syntax noprefix;"
- ASL(Rijndael_Enc_AdvancedProcessBlocks)
#if CRYPTOPP_BOOL_X64
- AS2( mov r8, rcx)
- AS2( mov [L_BP], rbp)
+ AS2( mov L_REG, rcx)
#endif
+ AS_PUSH_IF86(bx)
+ AS_PUSH_IF86(bp)
+ AS2( mov AS_REG_7, WORD_REG(si))
#else
- AS1( push esi)
- AS1( push edi)
- AS2( lea esi, [Te])
- AS2( mov edi, [g_cacheLineSize])
-#endif
-
-#if CRYPTOPP_BOOL_X86
+ AS_PUSH_IF86(si)
+ AS_PUSH_IF86(di)
#if !defined(_MSC_VER) || (_MSC_VER < 1300)
AS_PUSH_IF86(bx)
#endif
AS_PUSH_IF86(bp)
- AS2( mov [ecx+16*12+16*4], esp)
+ AS2( lea AS_REG_7, [Te])
+ AS2( mov edi, [g_cacheLineSize])
+#endif
+
+#if CRYPTOPP_BOOL_X86
+ AS2( mov [ecx+16*12+16*4], esp) // save esp to L_SP
AS2( lea esp, [ecx-512])
#endif
// copy subkeys to stack
- AS2( mov WORD_REG(bp), [L_KEYS_BEGIN])
+ AS2( mov WORD_REG(si), [L_KEYS_BEGIN])
AS2( mov WORD_REG(ax), 16)
- AS2( and WORD_REG(ax), WORD_REG(bp))
+ AS2( and WORD_REG(ax), WORD_REG(si))
AS2( movdqa xmm3, XMMWORD_PTR [WORD_REG(dx)+16+WORD_REG(ax)]) // subkey 1 (non-counter) or 2 (counter)
AS2( movdqa [L_KEY12], xmm3)
AS2( lea WORD_REG(ax), [WORD_REG(dx)+WORD_REG(ax)+2*16])
- AS2( sub WORD_REG(ax), WORD_REG(bp))
+ AS2( sub WORD_REG(ax), WORD_REG(si))
ASL(0)
- AS2( movdqa xmm0, [WORD_REG(ax)+WORD_REG(bp)])
- AS2( movdqa XMMWORD_PTR [L_SUBKEYS+WORD_REG(bp)], xmm0)
- AS2( add WORD_REG(bp), 16)
- AS2( cmp WORD_REG(bp), 16*12)
+ AS2( movdqa xmm0, [WORD_REG(ax)+WORD_REG(si)])
+ AS2( movdqa XMMWORD_PTR [L_SUBKEYS+WORD_REG(si)], xmm0)
+ AS2( add WORD_REG(si), 16)
+ AS2( cmp WORD_REG(si), 16*12)
ASJ( jl, 0, b)
// read subkeys 0, 1 and last
- AS2( movdqa xmm4, [WORD_REG(ax)+WORD_REG(bp)]) // last subkey
+ AS2( movdqa xmm4, [WORD_REG(ax)+WORD_REG(si)]) // last subkey
AS2( movdqa xmm1, [WORD_REG(dx)]) // subkey 0
AS2( MOVD MM(1), [WORD_REG(dx)+4*4]) // 0,1,2,3
AS2( mov ebx, [WORD_REG(dx)+5*4]) // 4,5,6,7
@@ -568,13 +574,13 @@ CRYPTOPP_NAKED void CRYPTOPP_FASTCALL Rijndael_Enc_AdvancedProcessBlocks(void *l
// load table into cache
AS2( xor WORD_REG(ax), WORD_REG(ax))
ASL(9)
- AS2( mov ebp, [WORD_REG(si)+WORD_REG(ax)])
+ AS2( mov esi, [AS_REG_7+WORD_REG(ax)])
AS2( add WORD_REG(ax), WORD_REG(di))
- AS2( mov ebp, [WORD_REG(si)+WORD_REG(ax)])
+ AS2( mov esi, [AS_REG_7+WORD_REG(ax)])
AS2( add WORD_REG(ax), WORD_REG(di))
- AS2( mov ebp, [WORD_REG(si)+WORD_REG(ax)])
+ AS2( mov esi, [AS_REG_7+WORD_REG(ax)])
AS2( add WORD_REG(ax), WORD_REG(di))
- AS2( mov ebp, [WORD_REG(si)+WORD_REG(ax)])
+ AS2( mov esi, [AS_REG_7+WORD_REG(ax)])
AS2( add WORD_REG(ax), WORD_REG(di))
AS2( cmp WORD_REG(ax), 2048)
ASJ( jl, 9, b)
@@ -584,12 +590,12 @@ CRYPTOPP_NAKED void CRYPTOPP_FASTCALL Rijndael_Enc_AdvancedProcessBlocks(void *l
ASJ( jz, 8, f)
// counter mode one-time setup
- AS2( mov WORD_REG(bp), [L_INBLOCKS])
- AS2( movdqu xmm2, [WORD_REG(bp)]) // counter
+ AS2( mov WORD_REG(si), [L_INBLOCKS])
+ AS2( movdqu xmm2, [WORD_REG(si)]) // counter
AS2( pxor xmm2, xmm1)
AS2( psrldq xmm1, 14)
AS2( movd eax, xmm1)
- AS2( mov al, BYTE PTR [WORD_REG(bp)+15])
+ AS2( mov al, BYTE PTR [WORD_REG(si)+15])
AS2( MOVD MM(2), eax)
#if CRYPTOPP_BOOL_X86
AS2( mov eax, 1)
@@ -666,8 +672,8 @@ CRYPTOPP_NAKED void CRYPTOPP_FASTCALL Rijndael_Enc_AdvancedProcessBlocks(void *l
ASL(8)
AS2( mov WORD_REG(ax), [L_INBLOCKS])
AS2( movdqu xmm2, [WORD_REG(ax)])
- AS2( mov WORD_REG(bp), [L_INXORBLOCKS])
- AS2( movdqu xmm5, [WORD_REG(bp)])
+ AS2( mov WORD_REG(si), [L_INXORBLOCKS])
+ AS2( movdqu xmm5, [WORD_REG(si)])
AS2( pxor xmm2, xmm1)
AS2( pxor xmm2, xmm5)
@@ -722,8 +728,8 @@ CRYPTOPP_NAKED void CRYPTOPP_FASTCALL Rijndael_Enc_AdvancedProcessBlocks(void *l
#else
AS2( add MM(2), 1)
#endif
- // remaining part of second round, in: edx(previous round),ebp(keyed counter byte) eax,ebx,[L_SAVED_X+2*4],[L_SAVED_X+3*4], out: eax,ebx,ecx,edx
- AS2( xor edx, DWORD PTR [WORD_REG(si)+WORD_REG(cx)*8+3])
+ // remaining part of second round, in: edx(previous round),esi(keyed counter byte) eax,ebx,[L_SAVED_X+2*4],[L_SAVED_X+3*4], out: eax,ebx,ecx,edx
+ AS2( xor edx, DWORD PTR [AS_REG_7+WORD_REG(cx)*8+3])
XOR( ebx, dl, 3)
MOV( ecx, dh, 2)
AS2( shr edx, 16)
@@ -785,10 +791,10 @@ CRYPTOPP_NAKED void CRYPTOPP_FASTCALL Rijndael_Enc_AdvancedProcessBlocks(void *l
AS2( sub L_REG, 16*16)
#define LAST(a, b, c) \
- AS2( movzx ebp, a )\
- AS2( movzx edi, BYTE PTR [WORD_REG(si)+WORD_REG(bp)*8+1] )\
- AS2( movzx ebp, b )\
- AS2( xor edi, DWORD PTR [WORD_REG(si)+WORD_REG(bp)*8+0] )\
+ AS2( movzx esi, a )\
+ AS2( movzx edi, BYTE PTR [AS_REG_7+WORD_REG(si)*8+1] )\
+ AS2( movzx esi, b )\
+ AS2( xor edi, DWORD PTR [AS_REG_7+WORD_REG(si)*8+0] )\
AS2( mov WORD PTR [L_LASTROUND+c], di )\
// last round
@@ -833,39 +839,58 @@ CRYPTOPP_NAKED void CRYPTOPP_FASTCALL Rijndael_Enc_AdvancedProcessBlocks(void *l
ASJ( jnz, 1, b)
#if CRYPTOPP_BOOL_X64
AS2( movdqa xmm0, [L_INCREMENTS])
- AS2( paddd xmm0, [L_INBLOCKS])
+ AS2( paddq xmm0, [L_INBLOCKS])
AS2( movdqa [L_INBLOCKS], xmm0)
#endif
ASJ( jmp, 3, b)
ASL(7)
+ // erase keys on stack
+ AS2( xorps xmm0, xmm0)
+ AS2( lea WORD_REG(ax), [L_SUBKEYS+7*16])
+ AS2( movaps [WORD_REG(ax)-7*16], xmm0)
+ AS2( movaps [WORD_REG(ax)-6*16], xmm0)
+ AS2( movaps [WORD_REG(ax)-5*16], xmm0)
+ AS2( movaps [WORD_REG(ax)-4*16], xmm0)
+ AS2( movaps [WORD_REG(ax)-3*16], xmm0)
+ AS2( movaps [WORD_REG(ax)-2*16], xmm0)
+ AS2( movaps [WORD_REG(ax)-1*16], xmm0)
+ AS2( movaps [WORD_REG(ax)+0*16], xmm0)
+ AS2( movaps [WORD_REG(ax)+1*16], xmm0)
+ AS2( movaps [WORD_REG(ax)+2*16], xmm0)
+ AS2( movaps [WORD_REG(ax)+3*16], xmm0)
+ AS2( movaps [WORD_REG(ax)+4*16], xmm0)
+ AS2( movaps [WORD_REG(ax)+5*16], xmm0)
+ AS2( movaps [WORD_REG(ax)+6*16], xmm0)
#if CRYPTOPP_BOOL_X86
AS2( mov esp, [L_SP])
AS1( emms)
-#else
- AS2( mov rbp, [L_BP])
#endif
AS_POP_IF86(bp)
#if !defined(_MSC_VER) || (_MSC_VER < 1300)
AS_POP_IF86(bx)
#endif
-#ifndef __GNUC__
+#if defined(_MSC_VER) && CRYPTOPP_BOOL_X86
AS_POP_IF86(di)
AS_POP_IF86(si)
+ AS1(ret)
#endif
#ifdef CRYPTOPP_GENERATE_X64_MASM
pop r12
- pop rbp
pop rbx
pop rdi
pop rsi
ret
Rijndael_Enc_AdvancedProcessBlocks ENDP
-#else
- AS1( ret)
#endif
#ifdef __GNUC__
".att_syntax prefix;"
+ :
+ : "c" (locals), "d" (k), "S" (Te), "D" (g_cacheLineSize)
+ : "memory", "cc", "%eax"
+ #if CRYPTOPP_BOOL_X64
+ , "%rbx", "%r8", "%r9", "%r10", "%r11", "%r12"
+ #endif
);
#endif
}
@@ -908,6 +933,7 @@ size_t Rijndael::Enc::AdvancedProcessBlocks(const byte *inBlocks, const byte *xo
size_t regSpill, lengthAndCounterFlag, keysBegin;
};
+ size_t increment = BLOCKSIZE;
const byte* zeros = (byte *)(Te+256);
byte *space;
@@ -917,6 +943,15 @@ size_t Rijndael::Enc::AdvancedProcessBlocks(const byte *inBlocks, const byte *xo
}
while (AliasedWithTable(space, space+sizeof(Locals)));
+ if (flags & BT_ReverseDirection)
+ {
+ assert(length % BLOCKSIZE == 0);
+ inBlocks += length - BLOCKSIZE;
+ xorBlocks += length - BLOCKSIZE;
+ outBlocks += length - BLOCKSIZE;
+ increment = 0-increment;
+ }
+
Locals &locals = *(Locals *)space;
locals.inBlocks = inBlocks;
@@ -924,29 +959,16 @@ size_t Rijndael::Enc::AdvancedProcessBlocks(const byte *inBlocks, const byte *xo
locals.outXorBlocks = (flags & BT_XorInput) || !xorBlocks ? zeros : xorBlocks;
locals.outBlocks = outBlocks;
- locals.inIncrement = (flags & BT_DontIncrementInOutPointers) ? 0 : BLOCKSIZE;
- locals.inXorIncrement = (flags & BT_XorInput) && xorBlocks ? BLOCKSIZE : 0;
- locals.outXorIncrement = (flags & BT_XorInput) || !xorBlocks ? 0 : BLOCKSIZE;
- locals.outIncrement = (flags & BT_DontIncrementInOutPointers) ? 0 : BLOCKSIZE;
+ locals.inIncrement = (flags & BT_DontIncrementInOutPointers) ? 0 : increment;
+ locals.inXorIncrement = (flags & BT_XorInput) && xorBlocks ? increment : 0;
+ locals.outXorIncrement = (flags & BT_XorInput) || !xorBlocks ? 0 : increment;
+ locals.outIncrement = (flags & BT_DontIncrementInOutPointers) ? 0 : increment;
locals.lengthAndCounterFlag = length - (length%16) - bool(flags & BT_InBlockIsCounter);
int keysToCopy = m_rounds - (flags & BT_InBlockIsCounter ? 3 : 2);
locals.keysBegin = (12-keysToCopy)*16;
- #ifdef __GNUC__
- __asm__ __volatile__
- (
- AS1(call Rijndael_Enc_AdvancedProcessBlocks)
- :
- : "c" (&locals), "d" (m_key.begin()), "S" (Te), "D" (g_cacheLineSize)
- : "memory", "cc", "%eax"
- #if CRYPTOPP_BOOL_X64
- , "%rbx", "%r8", "%r10", "%r11", "%r12"
- #endif
- );
- #else
- Rijndael_Enc_AdvancedProcessBlocks(&locals, m_key);
- #endif
+ Rijndael_Enc_AdvancedProcessBlocks(&locals, m_key);
return length%16;
}
else