From df1ffe1e41f89222c379d982e543c2a32da78cbd Mon Sep 17 00:00:00 2001 From: weidai Date: Fri, 4 May 2007 15:24:09 +0000 Subject: fix compile for x64, DLL and VC 6 git-svn-id: svn://svn.code.sf.net/p/cryptopp/code/trunk/c5@332 57ff6487-cd31-0410-9ec3-f628ee90f5f0 --- integer.cpp | 389 +++++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 270 insertions(+), 119 deletions(-) (limited to 'integer.cpp') diff --git a/integer.cpp b/integer.cpp index 64f3cea..a8e7881 100644 --- a/integer.cpp +++ b/integer.cpp @@ -18,7 +18,7 @@ #include -#if defined(_MSC_VER) && _MSC_VER >= 1400 +#if _MSC_VER >= 1400 #include #endif @@ -30,6 +30,8 @@ #pragma message("You do not seem to have the Visual C++ Processor Pack installed, so use of SSE2 instructions will be disabled.") #endif +#define CRYPTOPP_INTEGER_SSE2 (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86) + NAMESPACE_BEGIN(CryptoPP) bool AssignIntToInteger(const std::type_info &valueType, void *pInteger, const void *pInt) @@ -99,7 +101,36 @@ static word AtomicInverseModPower2(word A) // ******************************************************** -#ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE +#if !defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE) || CRYPTOPP_BOOL_X64 + #define Declare2Words(x) word x##0, x##1; + #define AssignWord(a, b) a##0 = b; a##1 = 0; + #define Add2WordsBy1(a, b, c) a##0 = b##0 + c; a##1 = b##1 + (a##0 < c); + #define LowWord(a) a##0 + #define HighWord(a) a##1 + #ifdef _MSC_VER + #define MultiplyWords(p, a, b) p##0 = _umul128(a, b, &p##1); + #define Double3Words(c, d) d##1 = __shiftleft128(d##0, d##1, 1); d##0 = __shiftleft128(c, d##0, 1); c *= 2; + #elif defined(__DECCXX) + #define MultiplyWords(p, a, b) p##0 = a*b; p##1 = asm("umulh %a0, %a1, %v0", a, b); + #elif CRYPTOPP_BOOL_X64 + #define MultiplyWords(p, a, b) asm ("mulq %3" : "=a"(p##0), "=d"(p##1) : "a"(a), "g"(b) : "cc"); + #define MulAcc(c, d, a, b) asm ("mulq %6; addq %3, %0; adcq %4, %1; adcq $0, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1), "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc"); + #define Double3Words(c, d) asm ("addq %0, %0; adcq %1, %1; adcq %2, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1) : : "cc"); + #define Acc2WordsBy1(a, b) asm ("addq %2, %0; adcq $0, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b) : "cc"); + #define Acc2WordsBy2(a, b) asm ("addq %2, %0; adcq %3, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b##0), "r"(b##1) : "cc"); + #define Acc3WordsBy2(c, d, e) asm ("addq %5, %0; adcq %6, %1; adcq $0, %2;" : "+r"(c), "=r"(e##0), "=r"(e##1) : "1"(d##0), "2"(d##1), "r"(e##0), "r"(e##1) : "cc"); + #endif + #ifndef Double3Words + #define Double3Words(c, d) d##1 = 2*d##1 + (d##0>>(WORD_BITS-1)); d##0 = 2*d##0 + (c>>(WORD_BITS-1)); c *= 2; + #endif + #ifndef Acc2WordsBy2 + #define Acc2WordsBy2(a, b) a##0 += b##0; a##1 += a##0 < b##0; a##1 += b##1; + #endif + #define AddWithCarry(u, a, b) {word t = a+b; u##0 = t + u##1; u##1 = (ta) + (u##0>t);} + #define GetCarry(u) u##1 + #define GetBorrow(u) u##1 +#else #define Declare2Words(x) dword x; #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER) #define MultiplyWords(p, a, b) p = __emulu(a, b); @@ -108,34 +139,23 @@ static word AtomicInverseModPower2(word A) #endif #define AssignWord(a, b) a = b; #define Add2WordsBy1(a, b, c) a = b + c; - #define Acc2WordsBy1(a, b) a += b; #define Acc2WordsBy2(a, b) a += b; - #define LowWord(a) (word)a - #define HighWord(a) (word)(a>>WORD_BITS) - #define Double2Words(a) a += a; + #define LowWord(a) word(a) + #define HighWord(a) word(a>>WORD_BITS) + #define Double3Words(c, d) d = 2*d + (c>>(WORD_BITS-1)); c *= 2; #define AddWithCarry(u, a, b) u = dword(a) + b + GetCarry(u); #define SubtractWithBorrow(u, a, b) u = dword(a) - b - GetBorrow(u); #define GetCarry(u) HighWord(u) #define GetBorrow(u) word(u>>(WORD_BITS*2-1)) -#else - #define Declare2Words(x) word x##0, x##1; - #define AssignWord(a, b) a##0 = b; a##1 = 0; - #define Add2WordsBy1(a, b, c) a##0 = b##0 + c; a##1 = b##1 + (a##0 < c); +#endif +#ifndef MulAcc + #define MulAcc(c, d, a, b) MultiplyWords(p, a, b); Acc2WordsBy1(p, c); c = LowWord(p); Acc2WordsBy1(d, HighWord(p)); +#endif +#ifndef Acc2WordsBy1 #define Acc2WordsBy1(a, b) Add2WordsBy1(a, a, b) - #define Acc2WordsBy2(a, b) a##0 += b##0; a##1 += a##0 < b##0; a##1 += b##1; - #define LowWord(a) a##0 - #define HighWord(a) a##1 - #ifdef _MSC_VER - #define MultiplyWords(p, a, b) p##0 = _umul128(a, b, &p##1); - #define Double2Words(a) a##1 = __shiftleft128(a##0, a##1, 1); a##0 += a##0; - #elif defined(__DECCXX) - #define MultiplyWords(p, a, b) p##0 = a*b; p##1 = asm("umulh %a0, %a1, %v0", a, b); - #define Double2Words(a) a##1 = (a##1 + a##1) + (a##0 >> (WORD_BITS-1)); a##0 += a##0; - #endif - #define AddWithCarry(u, a, b) {word t = a+b; u##0 = t + u##1; u##1 = (ta) + (u##0>t);} - #define GetCarry(u) u##1 - #define GetBorrow(u) u##1 +#endif +#ifndef Acc3WordsBy2 + #define Acc3WordsBy2(c, d, e) Acc2WordsBy1(e, c); c = LowWord(e); Add2WordsBy1(e, d, HighWord(e)); #endif class DWord @@ -411,9 +431,8 @@ inline word DWord::operator%(word a) // use some tricks to share assembly code between MSVC and GCC #if defined(__GNUC__) - #define CRYPTOPP_NAKED #define AddPrologue \ - word32 result; \ + word result; \ __asm__ __volatile__ \ ( \ ".intel_syntax noprefix;" @@ -454,7 +473,6 @@ inline word DWord::operator%(word a) : "memory", "cc" \ ); #else - #define CRYPTOPP_NAKED __declspec(naked) #define AddPrologue \ __asm push edi \ __asm push esi \ @@ -464,33 +482,107 @@ inline word DWord::operator%(word a) __asm pop esi \ __asm pop edi \ __asm ret 8 +#if _MSC_VER < 1300 + #define SaveEBX __asm push ebx + #define RestoreEBX __asm pop ebx +#else + #define SaveEBX + #define RestoreEBX +#endif #define SquPrologue \ AS2( mov eax, A) \ AS2( mov ecx, C) \ + SaveEBX \ AS2( lea ebx, s_maskLow16) - #define SquEpilogue #define MulPrologue \ AS2( mov eax, A) \ AS2( mov edi, B) \ AS2( mov ecx, C) \ + SaveEBX \ AS2( lea ebx, s_maskLow16) - #define MulEpilogue #define TopPrologue \ AS2( mov eax, A) \ AS2( mov edi, B) \ AS2( mov ecx, C) \ AS2( mov esi, L) \ + SaveEBX \ AS2( lea ebx, s_maskLow16) - #define TopEpilogue + #define SquEpilogue RestoreEBX + #define MulEpilogue RestoreEBX + #define TopEpilogue RestoreEBX #endif -#if defined(_MSC_VER) && defined(_M_X64) +#ifdef CRYPTOPP_X64_MASM_AVAILABLE extern "C" { -int Baseline_Add(size_t N, word *C, const word *A, const word *B); -int Baseline_Sub(size_t N, word *C, const word *A, const word *B); +word Baseline_Add(size_t N, word *C, const word *A, const word *B); +word Baseline_Sub(size_t N, word *C, const word *A, const word *B); +} +#elif defined(CRYPTOPP_X64_ASM_AVAILABLE) && defined(__GNUC__) +word Baseline_Add(size_t N, word *C, const word *A, const word *B) +{ + word result; + __asm__ __volatile__ + ( + ".intel_syntax;" + AS1( neg %1) + ASJ( jz, 1, f) + AS2( mov %0,[%3+8*%1]) + AS2( add %0,[%4+8*%1]) + AS2( mov [%2+8*%1],%0) + ASL(0) + AS2( mov %0,[%3+8*%1+8]) + AS2( adc %0,[%4+8*%1+8]) + AS2( mov [%2+8*%1+8],%0) + AS2( lea %1,[%1+2]) + ASJ( jrcxz, 1, f) + AS2( mov %0,[%3+8*%1]) + AS2( adc %0,[%4+8*%1]) + AS2( mov [%2+8*%1],%0) + ASJ( jmp, 0, b) + ASL(1) + AS2( mov %0, 0) + AS2( adc %0, %0) + ".att_syntax;" + : "=&r" (result) + : "c" (N), "r" (C+N), "r" (A+N), "r" (B+N) + : "memory", "cc" + ); + return result; } -#elif defined(CRYPTOPP_X86_ASM_AVAILABLE) -CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B) + +word Baseline_Sub(size_t N, word *C, const word *A, const word *B) +{ + word result; + __asm__ __volatile__ + ( + ".intel_syntax;" + AS1( neg %1) + ASJ( jz, 1, f) + AS2( mov %0,[%3+8*%1]) + AS2( sub %0,[%4+8*%1]) + AS2( mov [%2+8*%1],%0) + ASL(0) + AS2( mov %0,[%3+8*%1+8]) + AS2( sbb %0,[%4+8*%1+8]) + AS2( mov [%2+8*%1+8],%0) + AS2( lea %1,[%1+2]) + ASJ( jrcxz, 1, f) + AS2( mov %0,[%3+8*%1]) + AS2( sbb %0,[%4+8*%1]) + AS2( mov [%2+8*%1],%0) + ASJ( jmp, 0, b) + ASL(1) + AS2( mov %0, 0) + AS2( adc %0, %0) + ".att_syntax;" + : "=&r" (result) + : "c" (N), "r" (C+N), "r" (A+N), "r" (B+N) + : "memory", "cc" + ); + return result; +} +#elif defined(CRYPTOPP_X86_ASM_AVAILABLE) && CRYPTOPP_BOOL_X86 +CRYPTOPP_NAKED word CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B) { AddPrologue @@ -531,7 +623,7 @@ CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word AddEpilogue } -CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B) +CRYPTOPP_NAKED word CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B) { AddPrologue @@ -572,8 +664,8 @@ CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word AddEpilogue } -#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE -CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Add(size_t N, word *C, const word *A, const word *B) +#if CRYPTOPP_INTEGER_SSE2 +CRYPTOPP_NAKED word CRYPTOPP_FASTCALL SSE2_Add(size_t N, word *C, const word *A, const word *B) { AddPrologue @@ -629,7 +721,7 @@ CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Add(size_t N, word *C, const word *A, AddEpilogue } -CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Sub(size_t N, word *C, const word *A, const word *B) +CRYPTOPP_NAKED word CRYPTOPP_FASTCALL SSE2_Sub(size_t N, word *C, const word *A, const word *B) { AddPrologue @@ -687,7 +779,7 @@ CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Sub(size_t N, word *C, const word *A, } #endif // #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE #else -int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B) +word CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B) { assert (N%2 == 0); @@ -703,7 +795,7 @@ int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word return int(GetCarry(u)); } -int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B) +word CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B) { assert (N%2 == 0); @@ -737,7 +829,7 @@ static word LinearMultiply(word *C, const word *A, word B, size_t N) #define Mul_2 \ Mul_Begin(2) \ Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ - Mul_End(2) + Mul_End(1, 1) #define Mul_4 \ Mul_Begin(4) \ @@ -746,7 +838,7 @@ static word LinearMultiply(word *C, const word *A, word B, size_t N) Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \ Mul_SaveAcc(3, 1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \ Mul_SaveAcc(4, 2, 3) Mul_Acc(3, 2) \ - Mul_End(4) + Mul_End(5, 3) #define Mul_8 \ Mul_Begin(8) \ @@ -763,7 +855,7 @@ static word LinearMultiply(word *C, const word *A, word B, size_t N) Mul_SaveAcc(10, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \ Mul_SaveAcc(11, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \ Mul_SaveAcc(12, 6, 7) Mul_Acc(7, 6) \ - Mul_End(8) + Mul_End(13, 7) #define Mul_16 \ Mul_Begin(16) \ @@ -796,7 +888,7 @@ static word LinearMultiply(word *C, const word *A, word B, size_t N) Mul_SaveAcc(26, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \ Mul_SaveAcc(27, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \ Mul_SaveAcc(28, 14, 15) Mul_Acc(15, 14) \ - Mul_End(16) + Mul_End(29, 15) #define Squ_2 \ Squ_Begin(2) \ @@ -900,6 +992,7 @@ static word LinearMultiply(word *C, const word *A, word B, size_t N) Bot_SaveAcc(14, 0, 15) Bot_Acc(1, 14) Bot_Acc(2, 13) Bot_Acc(3, 12) Bot_Acc(4, 11) Bot_Acc(5, 10) Bot_Acc(6, 9) Bot_Acc(7, 8) Bot_Acc(8, 7) Bot_Acc(9, 6) Bot_Acc(10, 5) Bot_Acc(11, 4) Bot_Acc(12, 3) Bot_Acc(13, 2) Bot_Acc(14, 1) Bot_Acc(15, 0) \ Bot_End(16) +#if 0 #define Mul_Begin(n) \ Declare2Words(p) \ Declare2Words(c) \ @@ -938,9 +1031,7 @@ static word LinearMultiply(word *C, const word *A, word B, size_t N) #define Bot_End(n) \ R[n-1] = e; - -/* -// this is slower on MSVC 2005 Win32 +#else #define Mul_Begin(n) \ Declare2Words(p) \ word c; \ @@ -950,25 +1041,20 @@ static word LinearMultiply(word *C, const word *A, word B, size_t N) AssignWord(d, HighWord(p)) #define Mul_Acc(i, j) \ - MultiplyWords(p, A[i], B[j]) \ - Acc2WordsBy1(p, c) \ - c = LowWord(p); \ - Acc2WordsBy1(d, HighWord(p)) + MulAcc(c, d, A[i], B[j]) #define Mul_SaveAcc(k, i, j) \ R[k] = c; \ - MultiplyWords(p, A[i], B[j]) \ - Acc2WordsBy1(p, LowWord(d)) \ - c = LowWord(p); \ + c = LowWord(d); \ AssignWord(d, HighWord(d)) \ - Acc2WordsBy1(d, HighWord(p)) + MulAcc(c, d, A[i], B[j]) -#define Mul_End(n) \ - R[2*n-3] = c; \ - MultiplyWords(p, A[n-1], B[n-1])\ - Acc2WordsBy2(d, p) \ - R[2*n-2] = LowWord(d); \ - R[2*n-1] = HighWord(d); +#define Mul_End(k, i) \ + R[k] = c; \ + MultiplyWords(p, A[i], B[i]) \ + Acc2WordsBy2(p, d) \ + R[k+1] = LowWord(p); \ + R[k+2] = HighWord(p); #define Bot_SaveAcc(k, i, j) \ R[k] = c; \ @@ -980,52 +1066,45 @@ static word LinearMultiply(word *C, const word *A, word B, size_t N) #define Bot_End(n) \ R[n-1] = c; -*/ +#endif #define Squ_Begin(n) \ Declare2Words(p) \ - Declare2Words(c) \ + word c; \ Declare2Words(d) \ Declare2Words(e) \ MultiplyWords(p, A[0], A[0]) \ R[0] = LowWord(p); \ AssignWord(e, HighWord(p)) \ MultiplyWords(p, A[0], A[1]) \ - AssignWord(c, LowWord(p)) \ + c = LowWord(p); \ AssignWord(d, HighWord(p)) \ Squ_NonDiag \ #define Squ_NonDiag \ - Double2Words(c) \ - Double2Words(d) \ + Double3Words(c, d) #define Squ_SaveAcc(k, i, j) \ - Acc2WordsBy2(c, e) \ - R[k] = LowWord(c); \ - Add2WordsBy1(e, d, HighWord(c)) \ + Acc3WordsBy2(c, d, e) \ + R[k] = c; \ MultiplyWords(p, A[i], A[j]) \ - AssignWord(c, LowWord(p)) \ + c = LowWord(p); \ AssignWord(d, HighWord(p)) \ #define Squ_Acc(i, j) \ - MultiplyWords(p, A[i], A[j]) \ - Acc2WordsBy1(c, LowWord(p)) \ - Acc2WordsBy1(d, HighWord(p)) + MulAcc(c, d, A[i], A[j]) #define Squ_Diag(i) \ Squ_NonDiag \ - MultiplyWords(p, A[i], A[i]) \ - Acc2WordsBy1(c, LowWord(p)) \ - Acc2WordsBy1(d, HighWord(p)) \ + MulAcc(c, d, A[i], A[i]) #define Squ_End(n) \ - Acc2WordsBy2(c, e) \ - R[2*n-3] = LowWord(c); \ - Acc2WordsBy1(d, HighWord(c)) \ + Acc3WordsBy2(c, d, e) \ + R[2*n-3] = c; \ MultiplyWords(p, A[n-1], A[n-1])\ - Acc2WordsBy2(d, p) \ - R[2*n-2] = LowWord(d); \ - R[2*n-1] = HighWord(d); + Acc2WordsBy2(p, e) \ + R[2*n-2] = LowWord(p); \ + R[2*n-1] = HighWord(p); void Baseline_Multiply2(word *R, const word *A, const word *B) { @@ -1072,7 +1151,62 @@ void Baseline_MultiplyBottom8(word *R, const word *A, const word *B) Bot_8 } -/* +#define Top_Begin(n) \ + Declare2Words(p) \ + word c; \ + Declare2Words(d) \ + MultiplyWords(p, A[0], B[n-2]);\ + AssignWord(d, HighWord(p)); + +#define Top_Acc(i, j) \ + MultiplyWords(p, A[i], B[j]);\ + Acc2WordsBy1(d, HighWord(p)); + +#define Top_SaveAcc0(i, j) \ + c = LowWord(d); \ + AssignWord(d, HighWord(d)) \ + MulAcc(c, d, A[i], B[j]) + +#define Top_SaveAcc1(i, j) \ + c = L=2 && N%2==0); -#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE - if (HasSSE2() && ((N>=8) & (N<=32))) - s_pTop[N/16](R, A, B, L[N-1]); - else -#endif - if (N<=4) - { - s_pMul[N/4](T, A, B); - memcpy(R, T+N, N*WORD_SIZE); - } + if (N <= s_recursionLimit) + s_pTop[N/4](R, A, B, L[N-1]); else { const size_t N2 = N/2; @@ -3076,13 +3234,6 @@ public: memcpy(m_counterAndSeed + 4, seed, seedSize); } - byte GenerateByte() - { - byte b; - GenerateBlock(&b, 1); - return b; - } - void GenerateBlock(byte *output, size_t size) { PutWord(false, BIG_ENDIAN_ORDER, m_counterAndSeed, m_counter); -- cgit v1.2.1