summaryrefslogtreecommitdiff
path: root/rijndael.cpp
diff options
context:
space:
mode:
authorweidai <weidai@57ff6487-cd31-0410-9ec3-f628ee90f5f0>2007-05-04 15:24:09 +0000
committerweidai <weidai@57ff6487-cd31-0410-9ec3-f628ee90f5f0>2007-05-04 15:24:09 +0000
commitdf1ffe1e41f89222c379d982e543c2a32da78cbd (patch)
tree3ddcd92ac078642dfed5375980dc2db4006d1498 /rijndael.cpp
parent1bb93ea95e3131c2b9a4f8f8bd3f3197e4d52a76 (diff)
downloadcryptopp-df1ffe1e41f89222c379d982e543c2a32da78cbd.tar.gz
fix compile for x64, DLL and VC 6
git-svn-id: svn://svn.code.sf.net/p/cryptopp/code/trunk/c5@332 57ff6487-cd31-0410-9ec3-f628ee90f5f0
Diffstat (limited to 'rijndael.cpp')
-rw-r--r--rijndael.cpp254
1 files changed, 158 insertions, 96 deletions
diff --git a/rijndael.cpp b/rijndael.cpp
index 4a8572f..ac4f769 100644
--- a/rijndael.cpp
+++ b/rijndael.cpp
@@ -149,81 +149,133 @@ void Rijndael::Base::UncheckedSetKey(const byte *userKey, unsigned int keylen, c
void Rijndael::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
{
-#ifdef CRYPTOPP_X86_ASM_AVAILABLE
+#if defined(CRYPTOPP_X86_ASM_AVAILABLE)
if (HasMMX())
{
const word32 *k = m_key;
const word32 *kLoopEnd = k + m_rounds*4;
+ #if CRYPTOPP_BOOL_X64
+ #define K_REG r8
+ #define K_END_REG r9
+ #define SAVE_K
+ #define RESTORE_K
+ #define RESTORE_K_END
+ #define SAVE_0(x) AS2(mov r10d, x)
+ #define SAVE_1(x) AS2(mov r11d, x)
+ #define SAVE_2(x) AS2(mov r12d, x)
+ #define RESTORE_0(x) AS2(mov x, r10d)
+ #define RESTORE_1(x) AS2(mov x, r11d)
+ #define RESTORE_2(x) AS2(mov x, r12d)
+ #else
+ #define K_REG esi
+ #define K_END_REG edi
+ #define SAVE_K AS2(movd mm4, esi)
+ #define RESTORE_K AS2(movd esi, mm4)
+ #define RESTORE_K_END AS2(movd edi, mm5)
+ #define SAVE_0(x) AS2(movd mm0, x)
+ #define SAVE_1(x) AS2(movd mm1, x)
+ #define SAVE_2(x) AS2(movd mm2, x)
+ #define RESTORE_0(x) AS2(movd x, mm0)
+ #define RESTORE_1(x) AS2(movd x, mm1)
+ #define RESTORE_2(x) AS2(movd x, mm2)
+ #endif
#ifdef __GNUC__
word32 t0, t1, t2, t3;
__asm__ __volatile__
(
".intel_syntax noprefix;"
- AS1( push ebx)
- AS1( push ebp)
- AS2( mov ebp, eax)
+ AS_PUSH( bx)
+ AS_PUSH( bp)
+ AS2( mov WORD_REG(bp), WORD_REG(ax))
+ #if CRYPTOPP_BOOL_X64
+ // save these manually. clobber list doesn't seem to work as of GCC 4.1.0
+ AS1( pushq K_REG)
+ AS1( pushq K_END_REG)
+ AS1( pushq r10)
+ AS1( pushq r11)
+ AS1( pushq r12)
+ AS2( mov K_REG, rsi)
+ AS2( mov K_END_REG, rcx)
+ #else
AS2( movd mm5, ecx)
+ #endif
#else
+ #if _MSC_VER < 1300
+ const word32 *t = Te;
+ AS2( mov eax, t)
+ #endif
AS2( mov edx, g_cacheLineSize)
- AS2( mov edi, inBlock)
- AS2( mov esi, k)
+ AS2( mov WORD_REG(di), inBlock)
+ AS2( mov K_REG, k)
AS2( movd mm5, kLoopEnd)
- AS1( push ebp)
+ #if _MSC_VER < 1300
+ AS_PUSH( bx)
+ AS_PUSH( bp)
+ AS2( mov ebp, eax)
+ #else
+ AS_PUSH( bp)
AS2( lea ebp, Te)
+ #endif
#endif
- AS2( mov eax, [esi+0*4]) // s0
- AS2( xor eax, [edi+0*4])
- AS2( movd mm0, eax)
- AS2( mov ebx, [esi+1*4])
- AS2( xor ebx, [edi+1*4])
- AS2( movd mm1, ebx)
+ AS2( mov eax, [K_REG+0*4]) // s0
+ AS2( xor eax, [WORD_REG(di)+0*4])
+ SAVE_0(eax)
+ AS2( mov ebx, [K_REG+1*4])
+ AS2( xor ebx, [WORD_REG(di)+1*4])
+ SAVE_1(ebx)
AS2( and ebx, eax)
- AS2( mov eax, [esi+2*4])
- AS2( xor eax, [edi+2*4])
- AS2( movd mm2, eax)
+ AS2( mov eax, [K_REG+2*4])
+ AS2( xor eax, [WORD_REG(di)+2*4])
+ SAVE_2(eax)
AS2( and ebx, eax)
- AS2( mov ecx, [esi+3*4])
- AS2( xor ecx, [edi+3*4])
+ AS2( mov ecx, [K_REG+3*4])
+ AS2( xor ecx, [WORD_REG(di)+3*4])
AS2( and ebx, ecx)
// read Te0 into L1 cache. this code could be simplifed by using lfence, but that is an SSE2 instruction
AS2( and ebx, 0)
AS2( mov edi, ebx) // make index depend on previous loads to simulate lfence
ASL(2)
- AS2( and ebx, [ebp+edi])
+ AS2( and ebx, [WORD_REG(bp)+WORD_REG(di)])
AS2( add edi, edx)
- AS2( and ebx, [ebp+edi])
+ AS2( and ebx, [WORD_REG(bp)+WORD_REG(di)])
AS2( add edi, edx)
- AS2( and ebx, [ebp+edi])
+ AS2( and ebx, [WORD_REG(bp)+WORD_REG(di)])
AS2( add edi, edx)
- AS2( and ebx, [ebp+edi])
+ AS2( and ebx, [WORD_REG(bp)+WORD_REG(di)])
AS2( add edi, edx)
AS2( cmp edi, 1024)
ASJ( jl, 2, b)
- AS2( and ebx, [ebp+1020])
+ AS2( and ebx, [WORD_REG(bp)+1020])
+#if CRYPTOPP_BOOL_X64
+ AS2( xor r10d, ebx)
+ AS2( xor r11d, ebx)
+ AS2( xor r12d, ebx)
+#else
AS2( movd mm6, ebx)
AS2( pxor mm2, mm6)
AS2( pxor mm1, mm6)
AS2( pxor mm0, mm6)
+#endif
AS2( xor ecx, ebx)
- AS2( mov edi, [esi+4*4]) // t0
- AS2( mov eax, [esi+5*4])
- AS2( mov ebx, [esi+6*4])
- AS2( mov edx, [esi+7*4])
- AS2( add esi, 8*4)
- AS2( movd mm4, esi)
+ AS2( mov edi, [K_REG+4*4]) // t0
+ AS2( mov eax, [K_REG+5*4])
+ AS2( mov ebx, [K_REG+6*4])
+ AS2( mov edx, [K_REG+7*4])
+ AS2( add K_REG, 8*4)
+ SAVE_K
#define QUARTER_ROUND(t, a, b, c, d) \
AS2(movzx esi, t##l)\
- AS2(d, [ebp+0*1024+4*esi])\
+ AS2(d, [WORD_REG(bp)+0*1024+4*WORD_REG(si)])\
AS2(movzx esi, t##h)\
- AS2(c, [ebp+1*1024+4*esi])\
+ AS2(c, [WORD_REG(bp)+1*1024+4*WORD_REG(si)])\
AS2(shr e##t##x, 16)\
AS2(movzx esi, t##l)\
- AS2(b, [ebp+2*1024+4*esi])\
+ AS2(b, [WORD_REG(bp)+2*1024+4*WORD_REG(si)])\
AS2(movzx esi, t##h)\
- AS2(a, [ebp+3*1024+4*esi])
+ AS2(a, [WORD_REG(bp)+3*1024+4*WORD_REG(si)])
#define s0 xor edi
#define s1 xor eax
@@ -235,69 +287,69 @@ void Rijndael::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock
#define t3 xor edx
QUARTER_ROUND(c, t0, t1, t2, t3)
- AS2( movd ecx, mm2)
+ RESTORE_2(ecx)
QUARTER_ROUND(c, t3, t0, t1, t2)
- AS2( movd ecx, mm1)
+ RESTORE_1(ecx)
QUARTER_ROUND(c, t2, t3, t0, t1)
- AS2( movd ecx, mm0)
+ RESTORE_0(ecx)
QUARTER_ROUND(c, t1, t2, t3, t0)
- AS2( movd mm2, ebx)
- AS2( movd mm1, eax)
- AS2( movd mm0, edi)
+ SAVE_2(ebx)
+ SAVE_1(eax)
+ SAVE_0(edi)
#undef QUARTER_ROUND
- AS2( movd esi, mm4)
+ RESTORE_K
ASL(0)
- AS2( mov edi, [esi+0*4])
- AS2( mov eax, [esi+1*4])
- AS2( mov ebx, [esi+2*4])
- AS2( mov ecx, [esi+3*4])
+ AS2( mov edi, [K_REG+0*4])
+ AS2( mov eax, [K_REG+1*4])
+ AS2( mov ebx, [K_REG+2*4])
+ AS2( mov ecx, [K_REG+3*4])
#define QUARTER_ROUND(t, a, b, c, d) \
AS2(movzx esi, t##l)\
- AS2(a, [ebp+3*1024+4*esi])\
+ AS2(a, [WORD_REG(bp)+3*1024+4*WORD_REG(si)])\
AS2(movzx esi, t##h)\
- AS2(b, [ebp+2*1024+4*esi])\
+ AS2(b, [WORD_REG(bp)+2*1024+4*WORD_REG(si)])\
AS2(shr e##t##x, 16)\
AS2(movzx esi, t##l)\
- AS2(c, [ebp+1*1024+4*esi])\
+ AS2(c, [WORD_REG(bp)+1*1024+4*WORD_REG(si)])\
AS2(movzx esi, t##h)\
- AS2(d, [ebp+0*1024+4*esi])
+ AS2(d, [WORD_REG(bp)+0*1024+4*WORD_REG(si)])
QUARTER_ROUND(d, s0, s1, s2, s3)
- AS2( movd edx, mm2)
+ RESTORE_2(edx)
QUARTER_ROUND(d, s3, s0, s1, s2)
- AS2( movd edx, mm1)
+ RESTORE_1(edx)
QUARTER_ROUND(d, s2, s3, s0, s1)
- AS2( movd edx, mm0)
+ RESTORE_0(edx)
QUARTER_ROUND(d, s1, s2, s3, s0)
- AS2( movd esi, mm4)
- AS2( movd mm2, ebx)
- AS2( movd mm1, eax)
- AS2( movd mm0, edi)
+ RESTORE_K
+ SAVE_2(ebx)
+ SAVE_1(eax)
+ SAVE_0(edi)
- AS2( mov edi, [esi+4*4])
- AS2( mov eax, [esi+5*4])
- AS2( mov ebx, [esi+6*4])
- AS2( mov edx, [esi+7*4])
+ AS2( mov edi, [K_REG+4*4])
+ AS2( mov eax, [K_REG+5*4])
+ AS2( mov ebx, [K_REG+6*4])
+ AS2( mov edx, [K_REG+7*4])
QUARTER_ROUND(c, t0, t1, t2, t3)
- AS2( movd ecx, mm2)
+ RESTORE_2(ecx)
QUARTER_ROUND(c, t3, t0, t1, t2)
- AS2( movd ecx, mm1)
+ RESTORE_1(ecx)
QUARTER_ROUND(c, t2, t3, t0, t1)
- AS2( movd ecx, mm0)
+ RESTORE_0(ecx)
QUARTER_ROUND(c, t1, t2, t3, t0)
- AS2( movd mm2, ebx)
- AS2( movd mm1, eax)
- AS2( movd mm0, edi)
-
- AS2( movd esi, mm4)
- AS2( movd edi, mm5)
- AS2( add esi, 8*4)
- AS2( movd mm4, esi)
- AS2( cmp edi, esi)
+ SAVE_2(ebx)
+ SAVE_1(eax)
+ SAVE_0(edi)
+
+ RESTORE_K
+ RESTORE_K_END
+ AS2( add K_REG, 8*4)
+ SAVE_K
+ AS2( cmp K_END_REG, K_REG)
ASJ( jne, 0, b)
#undef QUARTER_ROUND
@@ -310,44 +362,54 @@ void Rijndael::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock
#undef t2
#undef t3
- AS2( mov eax, [edi+0*4])
- AS2( mov ecx, [edi+1*4])
- AS2( mov esi, [edi+2*4])
- AS2( mov edi, [edi+3*4])
+ AS2( mov eax, [K_END_REG+0*4])
+ AS2( mov ecx, [K_END_REG+1*4])
+ AS2( mov esi, [K_END_REG+2*4])
+ AS2( mov edi, [K_END_REG+3*4])
#define QUARTER_ROUND(a, b, c, d) \
AS2( movzx ebx, dl)\
- AS2( movzx ebx, BYTE PTR [ebp+1+4*ebx])\
+ AS2( movzx ebx, BYTE PTR [WORD_REG(bp)+1+4*WORD_REG(bx)])\
AS2( shl ebx, 3*8)\
AS2( xor a, ebx)\
AS2( movzx ebx, dh)\
- AS2( movzx ebx, BYTE PTR [ebp+1+4*ebx])\
+ AS2( movzx ebx, BYTE PTR [WORD_REG(bp)+1+4*WORD_REG(bx)])\
AS2( shl ebx, 2*8)\
AS2( xor b, ebx)\
AS2( shr edx, 16)\
AS2( movzx ebx, dl)\
AS2( shr edx, 8)\
- AS2( movzx ebx, BYTE PTR [ebp+1+4*ebx])\
+ AS2( movzx ebx, BYTE PTR [WORD_REG(bp)+1+4*WORD_REG(bx)])\
AS2( shl ebx, 1*8)\
AS2( xor c, ebx)\
- AS2( movzx ebx, BYTE PTR [ebp+1+4*edx])\
+ AS2( movzx ebx, BYTE PTR [WORD_REG(bp)+1+4*WORD_REG(dx)])\
AS2( xor d, ebx)
QUARTER_ROUND(eax, ecx, esi, edi)
- AS2( movd edx, mm2)
+ RESTORE_2(edx)
QUARTER_ROUND(edi, eax, ecx, esi)
- AS2( movd edx, mm1)
+ RESTORE_1(edx)
QUARTER_ROUND(esi, edi, eax, ecx)
- AS2( movd edx, mm0)
+ RESTORE_0(edx)
QUARTER_ROUND(ecx, esi, edi, eax)
#undef QUARTER_ROUND
- AS1( pop ebp)
- AS1( emms)
+#if CRYPTOPP_BOOL_X64
+ AS1(popq r12)
+ AS1(popq r11)
+ AS1(popq r10)
+ AS1(popq K_END_REG)
+ AS1(popq K_REG)
+#else
+ AS1(emms)
+#endif
+ AS_POP( bp)
+#if defined(__GNUC__) || (defined(_MSC_VER) && _MSC_VER < 1300)
+ AS_POP( bx)
+#endif
#ifdef __GNUC__
- AS1( pop ebx)
".att_syntax prefix;"
: "=a" (t0), "=c" (t1), "=S" (t2), "=D" (t3)
: "a" (Te), "D" (inBlock), "S" (k), "c" (kLoopEnd), "d" (g_cacheLineSize)
@@ -366,19 +428,19 @@ void Rijndael::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock
((word32 *)outBlock)[2] = t2;
((word32 *)outBlock)[3] = t3;
#else
- AS2( mov ebx, xorBlock)
- AS2( test ebx, ebx)
+ AS2( mov WORD_REG(bx), xorBlock)
+ AS2( test WORD_REG(bx), WORD_REG(bx))
ASJ( jz, 1, f)
- AS2( xor eax, [ebx+0*4])
- AS2( xor ecx, [ebx+1*4])
- AS2( xor esi, [ebx+2*4])
- AS2( xor edi, [ebx+3*4])
+ AS2( xor eax, [WORD_REG(bx)+0*4])
+ AS2( xor ecx, [WORD_REG(bx)+1*4])
+ AS2( xor esi, [WORD_REG(bx)+2*4])
+ AS2( xor edi, [WORD_REG(bx)+3*4])
ASL(1)
- AS2( mov ebx, outBlock)
- AS2( mov [ebx+0*4], eax)
- AS2( mov [ebx+1*4], ecx)
- AS2( mov [ebx+2*4], esi)
- AS2( mov [ebx+3*4], edi)
+ AS2( mov WORD_REG(bx), outBlock)
+ AS2( mov [WORD_REG(bx)+0*4], eax)
+ AS2( mov [WORD_REG(bx)+1*4], ecx)
+ AS2( mov [WORD_REG(bx)+2*4], esi)
+ AS2( mov [WORD_REG(bx)+3*4], edi)
#endif
}
else