diff options
author | weidai <weidai@57ff6487-cd31-0410-9ec3-f628ee90f5f0> | 2003-07-25 00:15:52 +0000 |
---|---|---|
committer | weidai <weidai@57ff6487-cd31-0410-9ec3-f628ee90f5f0> | 2003-07-25 00:15:52 +0000 |
commit | 58d9f812c149943cc18bd5b926b22fcc7b9b8b27 (patch) | |
tree | 9227398b87250aa1eca81b0853a967fe9d003103 /integer.cpp | |
parent | 60ae2279fbb3b8082a7963ce73dcd2b6b2d4a50e (diff) | |
download | cryptopp-58d9f812c149943cc18bd5b926b22fcc7b9b8b27.tar.gz |
fix bugs in 64-bit CPU support
git-svn-id: svn://svn.code.sf.net/p/cryptopp/code/trunk/c5@112 57ff6487-cd31-0410-9ec3-f628ee90f5f0
Diffstat (limited to 'integer.cpp')
-rw-r--r-- | integer.cpp | 743 |
1 files changed, 465 insertions, 278 deletions
diff --git a/integer.cpp b/integer.cpp index deb60f9..52042dd 100644 --- a/integer.cpp +++ b/integer.cpp @@ -60,8 +60,6 @@ void AlignedAllocator<T>::deallocate(void *p, size_type n) } #endif -#define MAKE_DWORD(lowWord, highWord) ((dword(highWord)<<WORD_BITS) | (lowWord)) - static int Compare(const word *A, const word *B, unsigned int N) { while (N--) @@ -106,31 +104,303 @@ static void TwosComplement(word *A, unsigned int N) A[i] = ~A[i]; } -static word LinearMultiply(word *C, const word *A, word B, unsigned int N) +static word AtomicInverseModPower2(word A) { - word carry=0; - for(unsigned i=0; i<N; i++) + assert(A%2==1); + + word R=A%8; + + for (unsigned i=3; i<WORD_BITS; i*=2) + R = R*(2-R*A); + + assert(R*A==1); + return R; +} + +// ******************************************************** + +class DWord +{ +public: + DWord() {} + +#ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE + explicit DWord(word low) { - dword p = (dword)A[i] * B + carry; - C[i] = LOW_WORD(p); - carry = HIGH_WORD(p); + m_whole = low; } - return carry; -} +#else + explicit DWord(word low) + { + m_halfs.low = low; + m_halfs.high = 0; + } +#endif + + DWord(word low, word high) + { + m_halfs.low = low; + m_halfs.high = high; + } + + static DWord Multiply(word a, word b) + { + DWord r; + #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE + r.m_whole = (dword)a * b; + #elif defined(__alpha__) + r.m_halfs.low = a*b; __asm__("umulh %1,%2,%0" : "=r" (r.m_halfs.high) : "r" (a), "r" (b)); + #elif defined(__ia64__) + r.m_halfs.low = a*b; __asm__("xmpy.hu %0=%1,%2" : "=f" (r.m_halfs.high) : "f" (a), "f" (b)); + #elif defined(_ARCH_PPC64) + r.m_halfs.low = a*b; __asm__("mulhdu %0,%1,%2" : "=r" (r.m_halfs.high) : "r" (a), "r" (b) : "cc"); + #elif defined(__x86_64__) + __asm__("mulq %3" : "=r.m_halfs.high" (r.m_halfs.high), "=a" (r.m_halfs.low) : "a" (a), "rm" (b) : "cc"); + #elif defined(__mips64) + __asm__("dmultu %2,%3" : "=h" (r.m_halfs.high), "=l" (r.m_halfs.low) : "r" (a), "r" (b)); + #elif defined(_M_IX86) + // for testing + word64 t = (word64)a * b; + r.m_halfs.high = ((word32 *)(&t))[1]; + r.m_halfs.low = (word32)t; + #else + #error can not implement DWord + #endif + return r; + } + + static DWord MultiplyAndAdd(word a, word b, word c) + { + DWord r = Multiply(a, b); + return r += c; + } + + DWord & operator+=(word a) + { + #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE + m_whole = m_whole + a; + #else + m_halfs.low += a; + m_halfs.high += (m_halfs.low < a); + #endif + return *this; + } + + DWord operator+(word a) + { + DWord r; + #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE + r.m_whole = m_whole + a; + #else + r.m_halfs.low = m_halfs.low + a; + r.m_halfs.high = m_halfs.high + (r.m_halfs.low < a); + #endif + return r; + } + + DWord operator-(DWord a) + { + DWord r; + #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE + r.m_whole = m_whole - a.m_whole; + #else + r.m_halfs.low = m_halfs.low - a.m_halfs.low; + r.m_halfs.high = m_halfs.high - a.m_halfs.high - (r.m_halfs.low > m_halfs.low); + #endif + return r; + } + + DWord operator-(word a) + { + DWord r; + #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE + r.m_whole = m_whole - a; + #else + r.m_halfs.low = m_halfs.low - a; + r.m_halfs.high = m_halfs.high - (r.m_halfs.low > m_halfs.low); + #endif + return r; + } + + // returns quotient, which must fit in a word + word operator/(word divisor); + + word operator%(word a); + + bool operator!() const + { + #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE + return !m_whole; + #else + return !m_halfs.high && !m_halfs.low; + #endif + } + + word GetLowHalf() const {return m_halfs.low;} + word GetHighHalf() const {return m_halfs.high;} + word GetHighHalfAsBorrow() const {return 0-m_halfs.high;} + +private: + union + { + #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE + dword m_whole; + #endif + struct + { + #ifdef IS_LITTLE_ENDIAN + word low; + word high; + #else + word high; + word low; + #endif + } m_halfs; + }; +}; -static void AtomicInverseModPower2(word *C, word A0, word A1) +class Word { - assert(A0%2==1); +public: + Word() {} - dword A=MAKE_DWORD(A0, A1), R=A0%8; + Word(word value) + { + m_whole = value; + } - for (unsigned i=3; i<2*WORD_BITS; i*=2) - R = R*(2-R*A); + Word(hword low, hword high) + { + m_whole = low | (word(high) << (WORD_BITS/2)); + } - assert(R*A==1); + static Word Multiply(hword a, hword b) + { + Word r; + r.m_whole = (word)a * b; + return r; + } + + Word operator-(Word a) + { + Word r; + r.m_whole = m_whole - a.m_whole; + return r; + } + + Word operator-(hword a) + { + Word r; + r.m_whole = m_whole - a; + return r; + } + + // returns quotient, which must fit in a word + hword operator/(hword divisor) + { + return hword(m_whole / divisor); + } + + bool operator!() const + { + return !m_whole; + } + + word GetWhole() const {return m_whole;} + hword GetLowHalf() const {return hword(m_whole);} + hword GetHighHalf() const {return hword(m_whole>>(WORD_BITS/2));} + hword GetHighHalfAsBorrow() const {return 0-hword(m_whole>>(WORD_BITS/2));} + +private: + word m_whole; +}; - C[0] = LOW_WORD(R); - C[1] = HIGH_WORD(R); +// do a 3 word by 2 word divide, returns quotient and leaves remainder in A +template <class S, class D> +S DivideThreeWordsByTwo(S *A, S B0, S B1, D *dummy=NULL) +{ + // assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a S + assert(A[2] < B1 || (A[2]==B1 && A[1] < B0)); + + // estimate the quotient: do a 2 S by 1 S divide + S Q; + if (S(B1+1) == 0) + Q = A[2]; + else + Q = D(A[1], A[2]) / S(B1+1); + + // now subtract Q*B from A + D p = D::Multiply(B0, Q); + D u = (D) A[0] - p.GetLowHalf(); + A[0] = u.GetLowHalf(); + u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - D::Multiply(B1, Q); + A[1] = u.GetLowHalf(); + A[2] += u.GetHighHalf(); + + // Q <= actual quotient, so fix it + while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0)) + { + u = (D) A[0] - B0; + A[0] = u.GetLowHalf(); + u = (D) A[1] - B1 - u.GetHighHalfAsBorrow(); + A[1] = u.GetLowHalf(); + A[2] += u.GetHighHalf(); + Q++; + assert(Q); // shouldn't overflow + } + + return Q; +} + +// do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1 +template <class S, class D> +inline D DivideFourWordsByTwo(S *T, const D &Al, const D &Ah, const D &B) +{ + if (!B) // if divisor is 0, we assume divisor==2**(2*WORD_BITS) + return D(Ah.GetLowHalf(), Ah.GetHighHalf()); + else + { + S Q[2]; + T[0] = Al.GetLowHalf(); + T[1] = Al.GetHighHalf(); + T[2] = Ah.GetLowHalf(); + T[3] = Ah.GetHighHalf(); + Q[1] = DivideThreeWordsByTwo<S, D>(T+1, B.GetLowHalf(), B.GetHighHalf()); + Q[0] = DivideThreeWordsByTwo<S, D>(T, B.GetLowHalf(), B.GetHighHalf()); + return D(Q[0], Q[1]); + } +} + +// returns quotient, which must fit in a word +inline word DWord::operator/(word a) +{ + #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE + return word(m_whole / a); + #else + hword r[4]; + return DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a).GetWhole(); + #endif +} + +inline word DWord::operator%(word a) +{ + #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE + return word(m_whole % a); + #else + if (a < (word(1) << (WORD_BITS/2))) + { + hword h = hword(a); + word r = m_halfs.high % h; + r = ((m_halfs.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h; + return hword((hword(m_halfs.low) + (r << (WORD_BITS/2))) % h); + } + else + { + hword r[4]; + DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a); + return Word(r[0], r[1]).GetWhole(); + } + #endif } // ******************************************************** @@ -162,69 +432,30 @@ word Portable::Add(word *C, const word *A, const word *B, unsigned int N) { assert (N%2 == 0); -#ifdef IS_LITTLE_ENDIAN - if (sizeof(dword) == sizeof(size_t)) // dword is only register size + DWord u(0, 0); + for (unsigned int i = 0; i < N; i+=2) { - dword carry = 0; - N >>= 1; - for (unsigned int i = 0; i < N; i++) - { - dword a = ((const dword *)A)[i] + carry; - dword c = a + ((const dword *)B)[i]; - ((dword *)C)[i] = c; - carry = (a < carry) | (c < a); - } - return (word)carry; - } - else -#endif - { - word carry = 0; - for (unsigned int i = 0; i < N; i+=2) - { - dword u = (dword) carry + A[i] + B[i]; - C[i] = LOW_WORD(u); - u = (dword) HIGH_WORD(u) + A[i+1] + B[i+1]; - C[i+1] = LOW_WORD(u); - carry = HIGH_WORD(u); - } - return carry; + u = DWord(A[i]) + B[i] + u.GetHighHalf(); + C[i] = u.GetLowHalf(); + u = DWord(A[i+1]) + B[i+1] + u.GetHighHalf(); + C[i+1] = u.GetLowHalf(); } + return u.GetHighHalf(); } word Portable::Subtract(word *C, const word *A, const word *B, unsigned int N) { assert (N%2 == 0); -#ifdef IS_LITTLE_ENDIAN - if (sizeof(dword) == sizeof(size_t)) // dword is only register size - { - dword borrow = 0; - N >>= 1; - for (unsigned int i = 0; i < N; i++) - { - dword a = ((const dword *)A)[i]; - dword b = a - borrow; - dword c = b - ((const dword *)B)[i]; - ((dword *)C)[i] = c; - borrow = (b > a) | (c > b); - } - return (word)borrow; - } - else -#endif + DWord u(0, 0); + for (unsigned int i = 0; i < N; i+=2) { - word borrow=0; - for (unsigned i = 0; i < N; i+=2) - { - dword u = (dword) A[i] - B[i] - borrow; - C[i] = LOW_WORD(u); - u = (dword) A[i+1] - B[i+1] - (word)(0-HIGH_WORD(u)); - C[i+1] = LOW_WORD(u); - borrow = 0-HIGH_WORD(u); - } - return borrow; + u = (DWord) A[i] - B[i] - u.GetHighHalfAsBorrow(); + C[i] = u.GetLowHalf(); + u = (DWord) A[i+1] - B[i+1] - u.GetHighHalfAsBorrow(); + C[i+1] = u.GetLowHalf(); } + return 0-u.GetHighHalf(); } void Portable::Multiply2(word *C, const word *A, const word *B) @@ -261,38 +492,28 @@ void Portable::Multiply2(word *C, const word *A, const word *B) unsigned int ai = A[1] < A[0]; unsigned int bi = B[0] < B[1]; unsigned int di = ai & bi; - dword d = (dword)D[di]*D[di+2]; + DWord d = DWord::Multiply(D[di], D[di+2]); D[1] = D[3] = 0; unsigned int si = ai + !bi; word s = D[si]; - dword A0B0 = (dword)A[0]*B[0]; - C[0] = LOW_WORD(A0B0); + DWord A0B0 = DWord::Multiply(A[0], B[0]); + C[0] = A0B0.GetLowHalf(); - dword A1B1 = (dword)A[1]*B[1]; - dword t = (dword) HIGH_WORD(A0B0) + LOW_WORD(A0B0) + LOW_WORD(d) + LOW_WORD(A1B1); - C[1] = LOW_WORD(t); + DWord A1B1 = DWord::Multiply(A[1], B[1]); + DWord t = (DWord) A0B0.GetHighHalf() + A0B0.GetLowHalf() + d.GetLowHalf() + A1B1.GetLowHalf(); + C[1] = t.GetLowHalf(); - t = A1B1 + HIGH_WORD(t) + HIGH_WORD(A0B0) + HIGH_WORD(d) + HIGH_WORD(A1B1) - s; - C[2] = LOW_WORD(t); - C[3] = HIGH_WORD(t); + t = A1B1 + t.GetHighHalf() + A0B0.GetHighHalf() + d.GetHighHalf() + A1B1.GetHighHalf() - s; + C[2] = t.GetLowHalf(); + C[3] = t.GetHighHalf(); } inline void Portable::Multiply2Bottom(word *C, const word *A, const word *B) { -#ifdef IS_LITTLE_ENDIAN - if (sizeof(dword) == sizeof(size_t)) - { - dword a = *(const dword *)A, b = *(const dword *)B; - ((dword *)C)[0] = a*b; - } - else -#endif - { - dword t = (dword)A[0]*B[0]; - C[0] = LOW_WORD(t); - C[1] = HIGH_WORD(t) + A[0]*B[1] + A[1]*B[0]; - } + DWord t = DWord::Multiply(A[0], B[0]); + C[0] = t.GetLowHalf(); + C[1] = t.GetHighHalf() + A[0]*B[1] + A[1]*B[0]; } word Portable::Multiply2Add(word *C, const word *A, const word *B) @@ -301,77 +522,77 @@ word Portable::Multiply2Add(word *C, const word *A, const word *B) unsigned int ai = A[1] < A[0]; unsigned int bi = B[0] < B[1]; unsigned int di = ai & bi; - dword d = (dword)D[di]*D[di+2]; + DWord d = DWord::Multiply(D[di], D[di+2]); D[1] = D[3] = 0; unsigned int si = ai + !bi; word s = D[si]; - dword A0B0 = (dword)A[0]*B[0]; - dword t = A0B0 + C[0]; - C[0] = LOW_WORD(t); + DWord A0B0 = DWord::Multiply(A[0], B[0]); + DWord t = A0B0 + C[0]; + C[0] = t.GetLowHalf(); - dword A1B1 = (dword)A[1]*B[1]; - t = (dword) HIGH_WORD(t) + LOW_WORD(A0B0) + LOW_WORD(d) + LOW_WORD(A1B1) + C[1]; - C[1] = LOW_WORD(t); + DWord A1B1 = DWord::Multiply(A[1], B[1]); + t = (DWord) t.GetHighHalf() + A0B0.GetLowHalf() + d.GetLowHalf() + A1B1.GetLowHalf() + C[1]; + C[1] = t.GetLowHalf(); - t = (dword) HIGH_WORD(t) + LOW_WORD(A1B1) + HIGH_WORD(A0B0) + HIGH_WORD(d) + HIGH_WORD(A1B1) - s + C[2]; - C[2] = LOW_WORD(t); + t = (DWord) t.GetHighHalf() + A1B1.GetLowHalf() + A0B0.GetHighHalf() + d.GetHighHalf() + A1B1.GetHighHalf() - s + C[2]; + C[2] = t.GetLowHalf(); - t = (dword) HIGH_WORD(t) + HIGH_WORD(A1B1) + C[3]; - C[3] = LOW_WORD(t); - return HIGH_WORD(t); + t = (DWord) t.GetHighHalf() + A1B1.GetHighHalf() + C[3]; + C[3] = t.GetLowHalf(); + return t.GetHighHalf(); } #define MulAcc(x, y) \ - p = (dword)A[x] * B[y] + c; \ - c = LOW_WORD(p); \ - p = (dword)d + HIGH_WORD(p); \ - d = LOW_WORD(p); \ - e += HIGH_WORD(p); + p = DWord::MultiplyAndAdd(A[x], B[y], c); \ + c = p.GetLowHalf(); \ + p = (DWord) d + p.GetHighHalf(); \ + d = p.GetLowHalf(); \ + e += p.GetHighHalf(); #define SaveMulAcc(s, x, y) \ R[s] = c; \ - p = (dword)A[x] * B[y] + d; \ - c = LOW_WORD(p); \ - p = (dword)e + HIGH_WORD(p); \ - d = LOW_WORD(p); \ - e = HIGH_WORD(p); + p = DWord::MultiplyAndAdd(A[x], B[y], d); \ + c = p.GetLowHalf(); \ + p = (DWord) e + p.GetHighHalf(); \ + d = p.GetLowHalf(); \ + e = p.GetHighHalf(); #define SquAcc(x, y) \ - q = (dword)A[x] * A[y]; \ + q = DWord::Multiply(A[x], A[y]); \ p = q + c; \ - c = LOW_WORD(p); \ - p = (dword)d + HIGH_WORD(p); \ - d = LOW_WORD(p); \ - e += HIGH_WORD(p); \ + c = p.GetLowHalf(); \ + p = (DWord) d + p.GetHighHalf(); \ + d = p.GetLowHalf(); \ + e += p.GetHighHalf(); \ p = q + c; \ - c = LOW_WORD(p); \ - p = (dword)d + HIGH_WORD(p); \ - d = LOW_WORD(p); \ - e += HIGH_WORD(p); + c = p.GetLowHalf(); \ + p = (DWord) d + p.GetHighHalf(); \ + d = p.GetLowHalf(); \ + e += p.GetHighHalf(); #define SaveSquAcc(s, x, y) \ R[s] = c; \ - q = (dword)A[x] * A[y]; \ + q = DWord::Multiply(A[x], A[y]); \ p = q + d; \ - c = LOW_WORD(p); \ - p = (dword)e + HIGH_WORD(p); \ - d = LOW_WORD(p); \ - e = HIGH_WORD(p); \ + c = p.GetLowHalf(); \ + p = (DWord) e + p.GetHighHalf(); \ + d = p.GetLowHalf(); \ + e = p.GetHighHalf(); \ p = q + c; \ - c = LOW_WORD(p); \ - p = (dword)d + HIGH_WORD(p); \ - d = LOW_WORD(p); \ - e += HIGH_WORD(p); + c = p.GetLowHalf(); \ + p = (DWord) d + p.GetHighHalf(); \ + d = p.GetLowHalf(); \ + e += p.GetHighHalf(); void Portable::Multiply4(word *R, const word *A, const word *B) { - dword p; + DWord p; word c, d, e; - p = (dword)A[0] * B[0]; - R[0] = LOW_WORD(p); - c = HIGH_WORD(p); + p = DWord::Multiply(A[0], B[0]); + R[0] = p.GetLowHalf(); + c = p.GetHighHalf(); d = e = 0; MulAcc(0, 1); @@ -394,38 +615,38 @@ void Portable::Multiply4(word *R, const word *A, const word *B) MulAcc(3, 2); R[5] = c; - p = (dword)A[3] * B[3] + d; - R[6] = LOW_WORD(p); - R[7] = e + HIGH_WORD(p); + p = DWord::MultiplyAndAdd(A[3], B[3], d); + R[6] = p.GetLowHalf(); + R[7] = e + p.GetHighHalf(); } void Portable::Square2(word *R, const word *A) { - dword p, q; + DWord p, q; word c, d, e; - p = (dword)A[0] * A[0]; - R[0] = LOW_WORD(p); - c = HIGH_WORD(p); + p = DWord::Multiply(A[0], A[0]); + R[0] = p.GetLowHalf(); + c = p.GetHighHalf(); d = e = 0; SquAcc(0, 1); R[1] = c; - p = (dword)A[1] * A[1] + d; - R[2] = LOW_WORD(p); - R[3] = e + HIGH_WORD(p); + p = DWord::MultiplyAndAdd(A[1], A[1], d); + R[2] = p.GetLowHalf(); + R[3] = e + p.GetHighHalf(); } void Portable::Square4(word *R, const word *A) { const word *B = A; - dword p, q; + DWord p, q; word c, d, e; - p = (dword)A[0] * A[0]; - R[0] = LOW_WORD(p); - c = HIGH_WORD(p); + p = DWord::Multiply(A[0], A[0]); + R[0] = p.GetLowHalf(); + c = p.GetHighHalf(); d = e = 0; SquAcc(0, 1); @@ -442,19 +663,19 @@ void Portable::Square4(word *R, const word *A) SaveSquAcc(4, 2, 3); R[5] = c; - p = (dword)A[3] * A[3] + d; - R[6] = LOW_WORD(p); - R[7] = e + HIGH_WORD(p); + p = DWord::MultiplyAndAdd(A[3], A[3], d); + R[6] = p.GetLowHalf(); + R[7] = e + p.GetHighHalf(); } void Portable::Multiply8(word *R, const word *A, const word *B) { - dword p; + DWord p; word c, d, e; - p = (dword)A[0] * B[0]; - R[0] = LOW_WORD(p); - c = HIGH_WORD(p); + p = DWord::Multiply(A[0], B[0]); + R[0] = p.GetLowHalf(); + c = p.GetHighHalf(); d = e = 0; MulAcc(0, 1); @@ -533,19 +754,19 @@ void Portable::Multiply8(word *R, const word *A, const word *B) MulAcc(7, 6); R[13] = c; - p = (dword)A[7] * B[7] + d; - R[14] = LOW_WORD(p); - R[15] = e + HIGH_WORD(p); + p = DWord::MultiplyAndAdd(A[7], B[7], d); + R[14] = p.GetLowHalf(); + R[15] = e + p.GetHighHalf(); } void Portable::Multiply4Bottom(word *R, const word *A, const word *B) { - dword p; + DWord p; word c, d, e; - p = (dword)A[0] * B[0]; - R[0] = LOW_WORD(p); - c = HIGH_WORD(p); + p = DWord::Multiply(A[0], B[0]); + R[0] = p.GetLowHalf(); + c = p.GetHighHalf(); d = e = 0; MulAcc(0, 1); @@ -561,12 +782,12 @@ void Portable::Multiply4Bottom(word *R, const word *A, const word *B) void Portable::Multiply8Bottom(word *R, const word *A, const word *B) { - dword p; + DWord p; word c, d, e; - p = (dword)A[0] * B[0]; - R[0] = LOW_WORD(p); - c = HIGH_WORD(p); + p = DWord::Multiply(A[0], B[0]); + R[0] = p.GetLowHalf(); + c = p.GetHighHalf(); d = e = 0; MulAcc(0, 1); @@ -620,6 +841,7 @@ class PentiumOptimized : public Portable public: static word __fastcall Add(word *C, const word *A, const word *B, unsigned int N); static word __fastcall Subtract(word *C, const word *A, const word *B, unsigned int N); +// TODO test this with .NET #if _MSC_VER < 1300 static inline void Square4(word *R, const word *A) { // VC60 workaround: MSVC 6.0 has an optimization bug that makes @@ -628,6 +850,7 @@ public: // bug is fixed. Multiply4(R, A, A); } +//#endif }; typedef PentiumOptimized LowLevel; @@ -1703,88 +1926,7 @@ void PentiumOptimized::Multiply8(word* Z, const word* X, const word* Y) ); } -#elif defined(__GNUC__) && defined(CRYPTOPP_64BIT_CPU) - -#ifdef __alpha__ -#define MUL64x64(a, b, c, d) c = a*b; __asm__("umulh %1,%2,%0" : "=r" (d) : "r" (a), "r" (b)) -#elif defined(__ia64__) -#define MUL64x64(a, b, c, d) c = a*b; __asm__("xmpy.hu %0=%1,%2" : "=f" (d) : "f" (a), "f" (b)) -#elif defined(_ARCH_PPC64) -#define MUL64x64(a, b, c, d) c = a*b; __asm__("mulhdu %0,%1,%2" : "=r" (d) : "r" (a), "r" (b) : "cc") -#elif defined(__x86_64__) -#define MUL64x64(a, b, c, d) __asm__("mulq %3" : "=d" (d), "=a" (c) : "a" (a), "rm" (b) : "cc") -#elif defined(__mips64) -#define MUL64x64(a, b, c, d) __asm__("dmultu %2,%3" : "=h" (d), "=l" (c) : "r" (a), "r" (b)) -#elif defined(__sparc_v9__) || defined(__sparcv9) || defined(__sparc_v8__) || defined(__sparcv8) -#define MUL64x64(a, b, c, d) __asm__("umul %2,%3,%1;rd %%y,%0" : "=r" (d), "=r" (c) : "r" (a), "r" (b)) -#endif - -class OptimizedFor64BitCPU : public Portable -{ -public: - static inline void Multiply2(word *C, const word *A, const word *B); - static inline word Multiply2Add(word *C, const word *A, const word *B); - static inline void Multiply4(word *C, const word *A, const word *B); - static inline unsigned int MultiplyRecursionLimit() {return 4;} - - static inline void Multiply4Bottom(word *C, const word *A, const word *B); - static inline unsigned int MultiplyBottomRecursionLimit() {return 4;} - - static inline void Square4(word *R, const word *A) - { - Multiply4(R, A, A); - } -}; - -typedef OptimizedFor64BitCPU LowLevel; - -inline void OptimizedFor64BitCPU::Multiply2(word *C, const word *A, const word *B) -{ - register dword c, d, a = *(const dword *)A, b = *(const dword *)B; - MUL64x64(a, b, c, d); - ((dword *)C)[0] = c; - ((dword *)C)[1] = d; -} - -inline word OptimizedFor64BitCPU::Multiply2Add(word *C, const word *A, const word *B) -{ - register dword c, d, e, a = *(const dword *)A, b = *(const dword *)B; - c = ((dword *)C)[0]; - MUL64x64(a, b, d, e); - d += c; - ((dword *)C)[0] = d; - d = (d < c); - c = ((dword *)C)[1] + d; - d = (c < d); - c += e; - ((dword *)C)[1] = c; - d |= (c < e); - return d; -} - -inline void OptimizedFor64BitCPU::Multiply4(word *R, const word *A, const word *B) -{ - Multiply2(R, A, B); - Multiply2(R+4, A+2, B+2); - word carry = Multiply2Add(R+2, A+0, B+2); - carry += Multiply2Add(R+2, A+2, B+0); - Increment(R+6, 2, carry); -} - -static inline void Multiply2BottomAdd(word *C, const word *A, const word *B) -{ - register dword a = *(const dword *)A, b = *(const dword *)B; - ((dword *)C)[0] = a*b + ((dword *)C)[0]; -} - -inline void OptimizedFor64BitCPU::Multiply4Bottom(word *R, const word *A, const word *B) -{ - Multiply2(R, A, B); - Multiply2BottomAdd(R+2, A+0, B+2); - Multiply2BottomAdd(R+2, A+2, B+0); -} - -#else // no processor specific code available +#else // no processor specific code at this layer typedef Portable LowLevel; @@ -1970,13 +2112,12 @@ void RecursiveMultiplyTop(word *R, word *T, const word *L, const word *A, const if (N==4) { P::Multiply4(T, A, B); - ((dword *)R)[0] = ((dword *)T)[2]; - ((dword *)R)[1] = ((dword *)T)[3]; + memcpy(R, T+4, 4*WORD_SIZE); } else if (N==2) { P::Multiply2(T, A, B); - ((dword *)R)[0] = ((dword *)T)[1]; + memcpy(R, T+2, 2*WORD_SIZE); } else { @@ -2088,6 +2229,18 @@ inline void MultiplyTop(word *R, word *T, const word *L, const word *A, const wo RecursiveMultiplyTop<LowLevel>(R, T, L, A, B, N); } +static word LinearMultiply(word *C, const word *A, word B, unsigned int N) +{ + word carry=0; + for(unsigned i=0; i<N; i++) + { + DWord p = DWord::MultiplyAndAdd(A[i], B, carry); + C[i] = p.GetLowHalf(); + carry = p.GetHighHalf(); + } + return carry; +} + // R[NA+NB] - result = A*B // T[NA+NB] - temporary work space // A[NA] ---- multiplier @@ -2153,7 +2306,14 @@ void AsymmetricMultiply(word *R, word *T, const word *A, unsigned int NA, const void RecursiveInverseModPower2(word *R, word *T, const word *A, unsigned int N) { if (N==2) - AtomicInverseModPower2(R, A[0], A[1]); + { + T[0] = AtomicInverseModPower2(A[0]); + T[1] = 0; + LowLevel::Multiply2Bottom(T+2, T, A); + TwosComplement(T+2, 2); + Increment(T+2, 2, 2); + LowLevel::Multiply2Bottom(R, T, T+2); + } else { const unsigned int N2 = N/2; @@ -2255,37 +2415,36 @@ void HalfMontgomeryReduce(word *R, word *T, const word *X, const word *M, const #undef R2 #undef R3 +/* // do a 3 word by 2 word divide, returns quotient and leaves remainder in A static word SubatomicDivide(word *A, word B0, word B1) { // assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a word assert(A[2] < B1 || (A[2]==B1 && A[1] < B0)); - dword p, u; - word Q; - // estimate the quotient: do a 2 word by 1 word divide + word Q; if (B1+1 == 0) Q = A[2]; else - Q = word(MAKE_DWORD(A[1], A[2]) / (B1+1)); + Q = DWord(A[1], A[2]).DividedBy(B1+1); // now subtract Q*B from A - p = (dword) B0*Q; - u = (dword) A[0] - LOW_WORD(p); - A[0] = LOW_WORD(u); - u = (dword) A[1] - HIGH_WORD(p) - (word)(0-HIGH_WORD(u)) - (dword)B1*Q; - A[1] = LOW_WORD(u); - A[2] += HIGH_WORD(u); + DWord p = DWord::Multiply(B0, Q); + DWord u = (DWord) A[0] - p.GetLowHalf(); + A[0] = u.GetLowHalf(); + u = (DWord) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - DWord::Multiply(B1, Q); + A[1] = u.GetLowHalf(); + A[2] += u.GetHighHalf(); // Q <= actual quotient, so fix it while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0)) { - u = (dword) A[0] - B0; - A[0] = LOW_WORD(u); - u = (dword) A[1] - B1 - (word)(0-HIGH_WORD(u)); - A[1] = LOW_WORD(u); - A[2] += HIGH_WORD(u); + u = (DWord) A[0] - B0; + A[0] = u.GetLowHalf(); + u = (DWord) A[1] - B1 - u.GetHighHalfAsBorrow(); + A[1] = u.GetLowHalf(); + A[2] += u.GetHighHalf(); Q++; assert(Q); // shouldn't overflow } @@ -2318,6 +2477,27 @@ static inline void AtomicDivide(word *Q, const word *A, const word *B) #endif } } +*/ + +static inline void AtomicDivide(word *Q, const word *A, const word *B) +{ + word T[4]; + DWord q = DivideFourWordsByTwo<word, DWord>(T, DWord(A[0], A[1]), DWord(A[2], A[3]), DWord(B[0], B[1])); + Q[0] = q.GetLowHalf(); + Q[1] = q.GetHighHalf(); + +#ifndef NDEBUG + if (B[0] || B[1]) + { + // multiply quotient and divisor and add remainder, make sure it equals dividend + assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0]))); + word P[4]; + Portable::Multiply2(P, Q, B); + Add(P, P, T, 4); + assert(memcmp(P, A, 4*WORD_SIZE)==0); + } +#endif +} // for use by Divide(), corrects the underestimated quotient {Q1,Q0} static void CorrectQuotientEstimate(word *R, word *T, word *Q, const word *B, unsigned int N) @@ -2570,6 +2750,13 @@ Integer::Integer(const Integer& t) CopyWords(reg, t.reg, reg.size()); } +Integer::Integer(Sign s, lword value) + : reg(2), sign(s) +{ + reg[0] = word(value); + reg[1] = word(SafeRightShift<WORD_BITS>(value)); +} + Integer::Integer(signed long value) : reg(2) { @@ -2581,7 +2768,7 @@ Integer::Integer(signed long value) value = -value; } reg[0] = word(value); - reg[1] = word(SafeRightShift<WORD_BITS, unsigned long>(value)); + reg[1] = word(SafeRightShift<WORD_BITS>((unsigned long)value)); } Integer::Integer(Sign s, word high, word low) @@ -2877,13 +3064,13 @@ void Integer::Decode(BufferedTransformation &bt, unsigned int inputLen, Signedne for (unsigned int i=inputLen; i > 0; i--) { bt.Get(b); - reg[(i-1)/WORD_SIZE] |= b << ((i-1)%WORD_SIZE)*8; + reg[(i-1)/WORD_SIZE] |= word(b) << ((i-1)%WORD_SIZE)*8; } if (sign == NEGATIVE) { for (unsigned i=inputLen; i<reg.size()*WORD_SIZE; i++) - reg[i/WORD_SIZE] |= 0xff << (i%WORD_SIZE)*8; + reg[i/WORD_SIZE] |= word(0xff) << (i%WORD_SIZE)*8; TwosComplement(reg, reg.size()); } } @@ -3598,8 +3785,8 @@ void Integer::Divide(word &remainder, Integer "ient, const Integer ÷nd remainder = 0; while (i--) { - quotient.reg[i] = word(MAKE_DWORD(dividend.reg[i], remainder) / divisor); - remainder = word(MAKE_DWORD(dividend.reg[i], remainder) % divisor); + quotient.reg[i] = DWord(dividend.reg[i], remainder) / divisor; + remainder = DWord(dividend.reg[i], remainder) % divisor; } if (dividend.NotNegative()) @@ -3640,16 +3827,16 @@ word Integer::Modulo(word divisor) const if (divisor <= 5) { - dword sum=0; + DWord sum(0, 0); while (i--) sum += reg[i]; - remainder = word(sum%divisor); + remainder = sum % divisor; } else { remainder = 0; while (i--) - remainder = word(MAKE_DWORD(reg[i], remainder) % divisor); + remainder = DWord(reg[i], remainder) % divisor; } } |