00001
00002
00003
00004 #include "pch.h"
00005
00006 #ifndef CRYPTOPP_IMPORTS
00007
00008 #include "integer.h"
00009 #include "modarith.h"
00010 #include "nbtheory.h"
00011 #include "asn.h"
00012 #include "oids.h"
00013 #include "words.h"
00014 #include "algparam.h"
00015 #include "pubkey.h"
00016 #include "sha.h"
00017 #include "cpu.h"
00018
00019 #include <iostream>
00020
00021 #if _MSC_VER >= 1400
00022 #include <intrin.h>
00023 #endif
00024
00025 #ifdef __DECCXX
00026 #include <c_asm.h>
00027 #endif
00028
00029 #ifdef CRYPTOPP_MSVC6_NO_PP
00030 #pragma message("You do not seem to have the Visual C++ Processor Pack installed, so use of SSE2 instructions will be disabled.")
00031 #endif
00032
00033 #define CRYPTOPP_INTEGER_SSE2 (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86)
00034
00035 NAMESPACE_BEGIN(CryptoPP)
00036
00037 bool AssignIntToInteger(const std::type_info &valueType, void *pInteger, const void *pInt)
00038 {
00039 if (valueType != typeid(Integer))
00040 return false;
00041 *reinterpret_cast<Integer *>(pInteger) = *reinterpret_cast<const int *>(pInt);
00042 return true;
00043 }
00044
00045 inline static int Compare(const word *A, const word *B, size_t N)
00046 {
00047 while (N--)
00048 if (A[N] > B[N])
00049 return 1;
00050 else if (A[N] < B[N])
00051 return -1;
00052
00053 return 0;
00054 }
00055
00056 inline static int Increment(word *A, size_t N, word B=1)
00057 {
00058 assert(N);
00059 word t = A[0];
00060 A[0] = t+B;
00061 if (A[0] >= t)
00062 return 0;
00063 for (unsigned i=1; i<N; i++)
00064 if (++A[i])
00065 return 0;
00066 return 1;
00067 }
00068
00069 inline static int Decrement(word *A, size_t N, word B=1)
00070 {
00071 assert(N);
00072 word t = A[0];
00073 A[0] = t-B;
00074 if (A[0] <= t)
00075 return 0;
00076 for (unsigned i=1; i<N; i++)
00077 if (A[i]--)
00078 return 0;
00079 return 1;
00080 }
00081
00082 static void TwosComplement(word *A, size_t N)
00083 {
00084 Decrement(A, N);
00085 for (unsigned i=0; i<N; i++)
00086 A[i] = ~A[i];
00087 }
00088
00089 static word AtomicInverseModPower2(word A)
00090 {
00091 assert(A%2==1);
00092
00093 word R=A%8;
00094
00095 for (unsigned i=3; i<WORD_BITS; i*=2)
00096 R = R*(2-R*A);
00097
00098 assert(R*A==1);
00099 return R;
00100 }
00101
00102
00103
00104 #if !defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE) || (defined(__x86_64__) && defined(CRYPTOPP_WORD128_AVAILABLE))
00105 #define Declare2Words(x) word x##0, x##1;
00106 #define AssignWord(a, b) a##0 = b; a##1 = 0;
00107 #define Add2WordsBy1(a, b, c) a##0 = b##0 + c; a##1 = b##1 + (a##0 < c);
00108 #define LowWord(a) a##0
00109 #define HighWord(a) a##1
00110 #ifdef _MSC_VER
00111 #define MultiplyWordsLoHi(p0, p1, a, b) p0 = _umul128(a, b, &p1);
00112 #ifndef __INTEL_COMPILER
00113 #define Double3Words(c, d) d##1 = __shiftleft128(d##0, d##1, 1); d##0 = __shiftleft128(c, d##0, 1); c *= 2;
00114 #endif
00115 #elif defined(__DECCXX)
00116 #define MultiplyWordsLoHi(p0, p1, a, b) p0 = a*b; p1 = asm("umulh %a0, %a1, %v0", a, b);
00117 #elif defined(__x86_64__)
00118 #ifdef __SUNPRO_CC
00119
00120 #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "r"(b) : "cc");
00121 #else
00122 #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
00123 #define MulAcc(c, d, a, b) asm ("mulq %6; addq %3, %0; adcq %4, %1; adcq $0, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1), "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
00124 #define Double3Words(c, d) asm ("addq %0, %0; adcq %1, %1; adcq %2, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1) : : "cc");
00125 #define Acc2WordsBy1(a, b) asm ("addq %2, %0; adcq $0, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b) : "cc");
00126 #define Acc2WordsBy2(a, b) asm ("addq %2, %0; adcq %3, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b##0), "r"(b##1) : "cc");
00127 #define Acc3WordsBy2(c, d, e) asm ("addq %5, %0; adcq %6, %1; adcq $0, %2;" : "+r"(c), "=r"(e##0), "=r"(e##1) : "1"(d##0), "2"(d##1), "r"(e##0), "r"(e##1) : "cc");
00128 #endif
00129 #endif
00130 #define MultiplyWords(p, a, b) MultiplyWordsLoHi(p##0, p##1, a, b)
00131 #ifndef Double3Words
00132 #define Double3Words(c, d) d##1 = 2*d##1 + (d##0>>(WORD_BITS-1)); d##0 = 2*d##0 + (c>>(WORD_BITS-1)); c *= 2;
00133 #endif
00134 #ifndef Acc2WordsBy2
00135 #define Acc2WordsBy2(a, b) a##0 += b##0; a##1 += a##0 < b##0; a##1 += b##1;
00136 #endif
00137 #define AddWithCarry(u, a, b) {word t = a+b; u##0 = t + u##1; u##1 = (t<a) + (u##0<t);}
00138 #define SubtractWithBorrow(u, a, b) {word t = a-b; u##0 = t - u##1; u##1 = (t>a) + (u##0>t);}
00139 #define GetCarry(u) u##1
00140 #define GetBorrow(u) u##1
00141 #else
00142 #define Declare2Words(x) dword x;
00143 #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER)
00144 #define MultiplyWords(p, a, b) p = __emulu(a, b);
00145 #else
00146 #define MultiplyWords(p, a, b) p = (dword)a*b;
00147 #endif
00148 #define AssignWord(a, b) a = b;
00149 #define Add2WordsBy1(a, b, c) a = b + c;
00150 #define Acc2WordsBy2(a, b) a += b;
00151 #define LowWord(a) word(a)
00152 #define HighWord(a) word(a>>WORD_BITS)
00153 #define Double3Words(c, d) d = 2*d + (c>>(WORD_BITS-1)); c *= 2;
00154 #define AddWithCarry(u, a, b) u = dword(a) + b + GetCarry(u);
00155 #define SubtractWithBorrow(u, a, b) u = dword(a) - b - GetBorrow(u);
00156 #define GetCarry(u) HighWord(u)
00157 #define GetBorrow(u) word(u>>(WORD_BITS*2-1))
00158 #endif
00159 #ifndef MulAcc
00160 #define MulAcc(c, d, a, b) MultiplyWords(p, a, b); Acc2WordsBy1(p, c); c = LowWord(p); Acc2WordsBy1(d, HighWord(p));
00161 #endif
00162 #ifndef Acc2WordsBy1
00163 #define Acc2WordsBy1(a, b) Add2WordsBy1(a, a, b)
00164 #endif
00165 #ifndef Acc3WordsBy2
00166 #define Acc3WordsBy2(c, d, e) Acc2WordsBy1(e, c); c = LowWord(e); Add2WordsBy1(e, d, HighWord(e));
00167 #endif
00168
00169 class DWord
00170 {
00171 public:
00172 DWord() {}
00173
00174 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00175 explicit DWord(word low)
00176 {
00177 m_whole = low;
00178 }
00179 #else
00180 explicit DWord(word low)
00181 {
00182 m_halfs.low = low;
00183 m_halfs.high = 0;
00184 }
00185 #endif
00186
00187 DWord(word low, word high)
00188 {
00189 m_halfs.low = low;
00190 m_halfs.high = high;
00191 }
00192
00193 static DWord Multiply(word a, word b)
00194 {
00195 DWord r;
00196 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00197 r.m_whole = (dword)a * b;
00198 #elif defined(MultiplyWordsLoHi)
00199 MultiplyWordsLoHi(r.m_halfs.low, r.m_halfs.high, a, b);
00200 #endif
00201 return r;
00202 }
00203
00204 static DWord MultiplyAndAdd(word a, word b, word c)
00205 {
00206 DWord r = Multiply(a, b);
00207 return r += c;
00208 }
00209
00210 DWord & operator+=(word a)
00211 {
00212 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00213 m_whole = m_whole + a;
00214 #else
00215 m_halfs.low += a;
00216 m_halfs.high += (m_halfs.low < a);
00217 #endif
00218 return *this;
00219 }
00220
00221 DWord operator+(word a)
00222 {
00223 DWord r;
00224 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00225 r.m_whole = m_whole + a;
00226 #else
00227 r.m_halfs.low = m_halfs.low + a;
00228 r.m_halfs.high = m_halfs.high + (r.m_halfs.low < a);
00229 #endif
00230 return r;
00231 }
00232
00233 DWord operator-(DWord a)
00234 {
00235 DWord r;
00236 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00237 r.m_whole = m_whole - a.m_whole;
00238 #else
00239 r.m_halfs.low = m_halfs.low - a.m_halfs.low;
00240 r.m_halfs.high = m_halfs.high - a.m_halfs.high - (r.m_halfs.low > m_halfs.low);
00241 #endif
00242 return r;
00243 }
00244
00245 DWord operator-(word a)
00246 {
00247 DWord r;
00248 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00249 r.m_whole = m_whole - a;
00250 #else
00251 r.m_halfs.low = m_halfs.low - a;
00252 r.m_halfs.high = m_halfs.high - (r.m_halfs.low > m_halfs.low);
00253 #endif
00254 return r;
00255 }
00256
00257
00258 word operator/(word divisor);
00259
00260 word operator%(word a);
00261
00262 bool operator!() const
00263 {
00264 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00265 return !m_whole;
00266 #else
00267 return !m_halfs.high && !m_halfs.low;
00268 #endif
00269 }
00270
00271 word GetLowHalf() const {return m_halfs.low;}
00272 word GetHighHalf() const {return m_halfs.high;}
00273 word GetHighHalfAsBorrow() const {return 0-m_halfs.high;}
00274
00275 private:
00276 union
00277 {
00278 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00279 dword m_whole;
00280 #endif
00281 struct
00282 {
00283 #ifdef IS_LITTLE_ENDIAN
00284 word low;
00285 word high;
00286 #else
00287 word high;
00288 word low;
00289 #endif
00290 } m_halfs;
00291 };
00292 };
00293
00294 class Word
00295 {
00296 public:
00297 Word() {}
00298
00299 Word(word value)
00300 {
00301 m_whole = value;
00302 }
00303
00304 Word(hword low, hword high)
00305 {
00306 m_whole = low | (word(high) << (WORD_BITS/2));
00307 }
00308
00309 static Word Multiply(hword a, hword b)
00310 {
00311 Word r;
00312 r.m_whole = (word)a * b;
00313 return r;
00314 }
00315
00316 Word operator-(Word a)
00317 {
00318 Word r;
00319 r.m_whole = m_whole - a.m_whole;
00320 return r;
00321 }
00322
00323 Word operator-(hword a)
00324 {
00325 Word r;
00326 r.m_whole = m_whole - a;
00327 return r;
00328 }
00329
00330
00331 hword operator/(hword divisor)
00332 {
00333 return hword(m_whole / divisor);
00334 }
00335
00336 bool operator!() const
00337 {
00338 return !m_whole;
00339 }
00340
00341 word GetWhole() const {return m_whole;}
00342 hword GetLowHalf() const {return hword(m_whole);}
00343 hword GetHighHalf() const {return hword(m_whole>>(WORD_BITS/2));}
00344 hword GetHighHalfAsBorrow() const {return 0-hword(m_whole>>(WORD_BITS/2));}
00345
00346 private:
00347 word m_whole;
00348 };
00349
00350
00351 template <class S, class D>
00352 S DivideThreeWordsByTwo(S *A, S B0, S B1, D *dummy=NULL)
00353 {
00354
00355 assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));
00356
00357
00358 S Q;
00359 if (S(B1+1) == 0)
00360 Q = A[2];
00361 else if (B1 > 0)
00362 Q = D(A[1], A[2]) / S(B1+1);
00363 else
00364 Q = D(A[0], A[1]) / B0;
00365
00366
00367 D p = D::Multiply(B0, Q);
00368 D u = (D) A[0] - p.GetLowHalf();
00369 A[0] = u.GetLowHalf();
00370 u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - D::Multiply(B1, Q);
00371 A[1] = u.GetLowHalf();
00372 A[2] += u.GetHighHalf();
00373
00374
00375 while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
00376 {
00377 u = (D) A[0] - B0;
00378 A[0] = u.GetLowHalf();
00379 u = (D) A[1] - B1 - u.GetHighHalfAsBorrow();
00380 A[1] = u.GetLowHalf();
00381 A[2] += u.GetHighHalf();
00382 Q++;
00383 assert(Q);
00384 }
00385
00386 return Q;
00387 }
00388
00389
00390 template <class S, class D>
00391 inline D DivideFourWordsByTwo(S *T, const D &Al, const D &Ah, const D &B)
00392 {
00393 if (!B)
00394 return D(Ah.GetLowHalf(), Ah.GetHighHalf());
00395 else
00396 {
00397 S Q[2];
00398 T[0] = Al.GetLowHalf();
00399 T[1] = Al.GetHighHalf();
00400 T[2] = Ah.GetLowHalf();
00401 T[3] = Ah.GetHighHalf();
00402 Q[1] = DivideThreeWordsByTwo<S, D>(T+1, B.GetLowHalf(), B.GetHighHalf());
00403 Q[0] = DivideThreeWordsByTwo<S, D>(T, B.GetLowHalf(), B.GetHighHalf());
00404 return D(Q[0], Q[1]);
00405 }
00406 }
00407
00408
00409 inline word DWord::operator/(word a)
00410 {
00411 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00412 return word(m_whole / a);
00413 #else
00414 hword r[4];
00415 return DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a).GetWhole();
00416 #endif
00417 }
00418
00419 inline word DWord::operator%(word a)
00420 {
00421 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00422 return word(m_whole % a);
00423 #else
00424 if (a < (word(1) << (WORD_BITS/2)))
00425 {
00426 hword h = hword(a);
00427 word r = m_halfs.high % h;
00428 r = ((m_halfs.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h;
00429 return hword((hword(m_halfs.low) + (r << (WORD_BITS/2))) % h);
00430 }
00431 else
00432 {
00433 hword r[4];
00434 DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a);
00435 return Word(r[0], r[1]).GetWhole();
00436 }
00437 #endif
00438 }
00439
00440
00441
00442
00443 #if defined(__GNUC__)
00444 #define AddPrologue \
00445 int result; \
00446 __asm__ __volatile__ \
00447 ( \
00448 ".intel_syntax noprefix;"
00449 #define AddEpilogue \
00450 ".att_syntax prefix;" \
00451 : "=a" (result)\
00452 : "d" (C), "a" (A), "D" (B), "c" (N) \
00453 : "%esi", "memory", "cc" \
00454 );\
00455 return result;
00456 #define MulPrologue \
00457 __asm__ __volatile__ \
00458 ( \
00459 ".intel_syntax noprefix;" \
00460 AS1( push ebx) \
00461 AS2( mov ebx, edx)
00462 #define MulEpilogue \
00463 AS1( pop ebx) \
00464 ".att_syntax prefix;" \
00465 : \
00466 : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B) \
00467 : "%esi", "memory", "cc" \
00468 );
00469 #define SquPrologue MulPrologue
00470 #define SquEpilogue \
00471 AS1( pop ebx) \
00472 ".att_syntax prefix;" \
00473 : \
00474 : "d" (s_maskLow16), "c" (C), "a" (A) \
00475 : "%esi", "%edi", "memory", "cc" \
00476 );
00477 #define TopPrologue MulPrologue
00478 #define TopEpilogue \
00479 AS1( pop ebx) \
00480 ".att_syntax prefix;" \
00481 : \
00482 : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B), "S" (L) \
00483 : "memory", "cc" \
00484 );
00485 #else
00486 #define AddPrologue \
00487 __asm push edi \
00488 __asm push esi \
00489 __asm mov eax, [esp+12] \
00490 __asm mov edi, [esp+16]
00491 #define AddEpilogue \
00492 __asm pop esi \
00493 __asm pop edi \
00494 __asm ret 8
00495 #if _MSC_VER < 1300
00496 #define SaveEBX __asm push ebx
00497 #define RestoreEBX __asm pop ebx
00498 #else
00499 #define SaveEBX
00500 #define RestoreEBX
00501 #endif
00502 #define SquPrologue \
00503 AS2( mov eax, A) \
00504 AS2( mov ecx, C) \
00505 SaveEBX \
00506 AS2( lea ebx, s_maskLow16)
00507 #define MulPrologue \
00508 AS2( mov eax, A) \
00509 AS2( mov edi, B) \
00510 AS2( mov ecx, C) \
00511 SaveEBX \
00512 AS2( lea ebx, s_maskLow16)
00513 #define TopPrologue \
00514 AS2( mov eax, A) \
00515 AS2( mov edi, B) \
00516 AS2( mov ecx, C) \
00517 AS2( mov esi, L) \
00518 SaveEBX \
00519 AS2( lea ebx, s_maskLow16)
00520 #define SquEpilogue RestoreEBX
00521 #define MulEpilogue RestoreEBX
00522 #define TopEpilogue RestoreEBX
00523 #endif
00524
00525 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
00526 extern "C" {
00527 int Baseline_Add(size_t N, word *C, const word *A, const word *B);
00528 int Baseline_Sub(size_t N, word *C, const word *A, const word *B);
00529 }
00530 #elif defined(CRYPTOPP_X64_ASM_AVAILABLE) && defined(__GNUC__) && defined(CRYPTOPP_WORD128_AVAILABLE)
00531 int Baseline_Add(size_t N, word *C, const word *A, const word *B)
00532 {
00533 word result;
00534 __asm__ __volatile__
00535 (
00536 ".intel_syntax;"
00537 AS1( neg %1)
00538 ASJ( jz, 1, f)
00539 AS2( mov %0,[%3+8*%1])
00540 AS2( add %0,[%4+8*%1])
00541 AS2( mov [%2+8*%1],%0)
00542 ASL(0)
00543 AS2( mov %0,[%3+8*%1+8])
00544 AS2( adc %0,[%4+8*%1+8])
00545 AS2( mov [%2+8*%1+8],%0)
00546 AS2( lea %1,[%1+2])
00547 ASJ( jrcxz, 1, f)
00548 AS2( mov %0,[%3+8*%1])
00549 AS2( adc %0,[%4+8*%1])
00550 AS2( mov [%2+8*%1],%0)
00551 ASJ( jmp, 0, b)
00552 ASL(1)
00553 AS2( mov %0, 0)
00554 AS2( adc %0, %0)
00555 ".att_syntax;"
00556 : "=&r" (result), "+c" (N)
00557 : "r" (C+N), "r" (A+N), "r" (B+N)
00558 : "memory", "cc"
00559 );
00560 return (int)result;
00561 }
00562
00563 int Baseline_Sub(size_t N, word *C, const word *A, const word *B)
00564 {
00565 word result;
00566 __asm__ __volatile__
00567 (
00568 ".intel_syntax;"
00569 AS1( neg %1)
00570 ASJ( jz, 1, f)
00571 AS2( mov %0,[%3+8*%1])
00572 AS2( sub %0,[%4+8*%1])
00573 AS2( mov [%2+8*%1],%0)
00574 ASL(0)
00575 AS2( mov %0,[%3+8*%1+8])
00576 AS2( sbb %0,[%4+8*%1+8])
00577 AS2( mov [%2+8*%1+8],%0)
00578 AS2( lea %1,[%1+2])
00579 ASJ( jrcxz, 1, f)
00580 AS2( mov %0,[%3+8*%1])
00581 AS2( sbb %0,[%4+8*%1])
00582 AS2( mov [%2+8*%1],%0)
00583 ASJ( jmp, 0, b)
00584 ASL(1)
00585 AS2( mov %0, 0)
00586 AS2( adc %0, %0)
00587 ".att_syntax;"
00588 : "=&r" (result), "+c" (N)
00589 : "r" (C+N), "r" (A+N), "r" (B+N)
00590 : "memory", "cc"
00591 );
00592 return (int)result;
00593 }
00594 #elif defined(CRYPTOPP_X86_ASM_AVAILABLE) && CRYPTOPP_BOOL_X86
00595 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
00596 {
00597 AddPrologue
00598
00599
00600 AS2( lea eax, [eax+4*ecx])
00601 AS2( lea edi, [edi+4*ecx])
00602 AS2( lea edx, [edx+4*ecx])
00603
00604 AS1( neg ecx)
00605 AS2( test ecx, 2)
00606 ASJ( jz, 0, f)
00607 AS2( sub ecx, 2)
00608 ASJ( jmp, 1, f)
00609
00610 ASL(0)
00611 ASJ( jecxz, 2, f)
00612 AS2( mov esi,[eax+4*ecx])
00613 AS2( adc esi,[edi+4*ecx])
00614 AS2( mov [edx+4*ecx],esi)
00615 AS2( mov esi,[eax+4*ecx+4])
00616 AS2( adc esi,[edi+4*ecx+4])
00617 AS2( mov [edx+4*ecx+4],esi)
00618 ASL(1)
00619 AS2( mov esi,[eax+4*ecx+8])
00620 AS2( adc esi,[edi+4*ecx+8])
00621 AS2( mov [edx+4*ecx+8],esi)
00622 AS2( mov esi,[eax+4*ecx+12])
00623 AS2( adc esi,[edi+4*ecx+12])
00624 AS2( mov [edx+4*ecx+12],esi)
00625
00626 AS2( lea ecx,[ecx+4])
00627 ASJ( jmp, 0, b)
00628
00629 ASL(2)
00630 AS2( mov eax, 0)
00631 AS1( setc al)
00632
00633 AddEpilogue
00634 }
00635
00636 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
00637 {
00638 AddPrologue
00639
00640
00641 AS2( lea eax, [eax+4*ecx])
00642 AS2( lea edi, [edi+4*ecx])
00643 AS2( lea edx, [edx+4*ecx])
00644
00645 AS1( neg ecx)
00646 AS2( test ecx, 2)
00647 ASJ( jz, 0, f)
00648 AS2( sub ecx, 2)
00649 ASJ( jmp, 1, f)
00650
00651 ASL(0)
00652 ASJ( jecxz, 2, f)
00653 AS2( mov esi,[eax+4*ecx])
00654 AS2( sbb esi,[edi+4*ecx])
00655 AS2( mov [edx+4*ecx],esi)
00656 AS2( mov esi,[eax+4*ecx+4])
00657 AS2( sbb esi,[edi+4*ecx+4])
00658 AS2( mov [edx+4*ecx+4],esi)
00659 ASL(1)
00660 AS2( mov esi,[eax+4*ecx+8])
00661 AS2( sbb esi,[edi+4*ecx+8])
00662 AS2( mov [edx+4*ecx+8],esi)
00663 AS2( mov esi,[eax+4*ecx+12])
00664 AS2( sbb esi,[edi+4*ecx+12])
00665 AS2( mov [edx+4*ecx+12],esi)
00666
00667 AS2( lea ecx,[ecx+4])
00668 ASJ( jmp, 0, b)
00669
00670 ASL(2)
00671 AS2( mov eax, 0)
00672 AS1( setc al)
00673
00674 AddEpilogue
00675 }
00676
00677 #if CRYPTOPP_INTEGER_SSE2
00678 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Add(size_t N, word *C, const word *A, const word *B)
00679 {
00680 AddPrologue
00681
00682
00683 AS2( lea eax, [eax+4*ecx])
00684 AS2( lea edi, [edi+4*ecx])
00685 AS2( lea edx, [edx+4*ecx])
00686
00687 AS1( neg ecx)
00688 AS2( pxor mm2, mm2)
00689 ASJ( jz, 2, f)
00690 AS2( test ecx, 2)
00691 ASJ( jz, 0, f)
00692 AS2( sub ecx, 2)
00693 ASJ( jmp, 1, f)
00694
00695 ASL(0)
00696 AS2( movd mm0, DWORD PTR [eax+4*ecx])
00697 AS2( movd mm1, DWORD PTR [edi+4*ecx])
00698 AS2( paddq mm0, mm1)
00699 AS2( paddq mm2, mm0)
00700 AS2( movd DWORD PTR [edx+4*ecx], mm2)
00701 AS2( psrlq mm2, 32)
00702
00703 AS2( movd mm0, DWORD PTR [eax+4*ecx+4])
00704 AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
00705 AS2( paddq mm0, mm1)
00706 AS2( paddq mm2, mm0)
00707 AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
00708 AS2( psrlq mm2, 32)
00709
00710 ASL(1)
00711 AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
00712 AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
00713 AS2( paddq mm0, mm1)
00714 AS2( paddq mm2, mm0)
00715 AS2( movd DWORD PTR [edx+4*ecx+8], mm2)
00716 AS2( psrlq mm2, 32)
00717
00718 AS2( movd mm0, DWORD PTR [eax+4*ecx+12])
00719 AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
00720 AS2( paddq mm0, mm1)
00721 AS2( paddq mm2, mm0)
00722 AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
00723 AS2( psrlq mm2, 32)
00724
00725 AS2( add ecx, 4)
00726 ASJ( jnz, 0, b)
00727
00728 ASL(2)
00729 AS2( movd eax, mm2)
00730 AS1( emms)
00731
00732 AddEpilogue
00733 }
00734 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Sub(size_t N, word *C, const word *A, const word *B)
00735 {
00736 AddPrologue
00737
00738
00739 AS2( lea eax, [eax+4*ecx])
00740 AS2( lea edi, [edi+4*ecx])
00741 AS2( lea edx, [edx+4*ecx])
00742
00743 AS1( neg ecx)
00744 AS2( pxor mm2, mm2)
00745 ASJ( jz, 2, f)
00746 AS2( test ecx, 2)
00747 ASJ( jz, 0, f)
00748 AS2( sub ecx, 2)
00749 ASJ( jmp, 1, f)
00750
00751 ASL(0)
00752 AS2( movd mm0, DWORD PTR [eax+4*ecx])
00753 AS2( movd mm1, DWORD PTR [edi+4*ecx])
00754 AS2( psubq mm0, mm1)
00755 AS2( psubq mm0, mm2)
00756 AS2( movd DWORD PTR [edx+4*ecx], mm0)
00757 AS2( psrlq mm0, 63)
00758
00759 AS2( movd mm2, DWORD PTR [eax+4*ecx+4])
00760 AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
00761 AS2( psubq mm2, mm1)
00762 AS2( psubq mm2, mm0)
00763 AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
00764 AS2( psrlq mm2, 63)
00765
00766 ASL(1)
00767 AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
00768 AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
00769 AS2( psubq mm0, mm1)
00770 AS2( psubq mm0, mm2)
00771 AS2( movd DWORD PTR [edx+4*ecx+8], mm0)
00772 AS2( psrlq mm0, 63)
00773
00774 AS2( movd mm2, DWORD PTR [eax+4*ecx+12])
00775 AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
00776 AS2( psubq mm2, mm1)
00777 AS2( psubq mm2, mm0)
00778 AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
00779 AS2( psrlq mm2, 63)
00780
00781 AS2( add ecx, 4)
00782 ASJ( jnz, 0, b)
00783
00784 ASL(2)
00785 AS2( movd eax, mm2)
00786 AS1( emms)
00787
00788 AddEpilogue
00789 }
00790 #endif // #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
00791 #else
00792 int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
00793 {
00794 assert (N%2 == 0);
00795
00796 Declare2Words(u);
00797 AssignWord(u, 0);
00798 for (size_t i=0; i<N; i+=2)
00799 {
00800 AddWithCarry(u, A[i], B[i]);
00801 C[i] = LowWord(u);
00802 AddWithCarry(u, A[i+1], B[i+1]);
00803 C[i+1] = LowWord(u);
00804 }
00805 return int(GetCarry(u));
00806 }
00807
00808 int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
00809 {
00810 assert (N%2 == 0);
00811
00812 Declare2Words(u);
00813 AssignWord(u, 0);
00814 for (size_t i=0; i<N; i+=2)
00815 {
00816 SubtractWithBorrow(u, A[i], B[i]);
00817 C[i] = LowWord(u);
00818 SubtractWithBorrow(u, A[i+1], B[i+1]);
00819 C[i+1] = LowWord(u);
00820 }
00821 return int(GetBorrow(u));
00822 }
00823 #endif
00824
00825 static word LinearMultiply(word *C, const word *A, word B, size_t N)
00826 {
00827 word carry=0;
00828 for(unsigned i=0; i<N; i++)
00829 {
00830 Declare2Words(p);
00831 MultiplyWords(p, A[i], B);
00832 Acc2WordsBy1(p, carry);
00833 C[i] = LowWord(p);
00834 carry = HighWord(p);
00835 }
00836 return carry;
00837 }
00838
00839 #ifndef CRYPTOPP_DOXYGEN_PROCESSING
00840
00841 #define Mul_2 \
00842 Mul_Begin(2) \
00843 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00844 Mul_End(1, 1)
00845
00846 #define Mul_4 \
00847 Mul_Begin(4) \
00848 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00849 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
00850 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
00851 Mul_SaveAcc(3, 1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
00852 Mul_SaveAcc(4, 2, 3) Mul_Acc(3, 2) \
00853 Mul_End(5, 3)
00854
00855 #define Mul_8 \
00856 Mul_Begin(8) \
00857 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00858 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
00859 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
00860 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
00861 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
00862 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
00863 Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
00864 Mul_SaveAcc(7, 1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
00865 Mul_SaveAcc(8, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
00866 Mul_SaveAcc(9, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
00867 Mul_SaveAcc(10, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
00868 Mul_SaveAcc(11, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
00869 Mul_SaveAcc(12, 6, 7) Mul_Acc(7, 6) \
00870 Mul_End(13, 7)
00871
00872 #define Mul_16 \
00873 Mul_Begin(16) \
00874 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00875 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
00876 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
00877 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
00878 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
00879 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
00880 Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
00881 Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
00882 Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
00883 Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
00884 Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
00885 Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
00886 Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
00887 Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
00888 Mul_SaveAcc(14, 0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
00889 Mul_SaveAcc(15, 1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
00890 Mul_SaveAcc(16, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
00891 Mul_SaveAcc(17, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
00892 Mul_SaveAcc(18, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
00893 Mul_SaveAcc(19, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
00894 Mul_SaveAcc(20, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
00895 Mul_SaveAcc(21, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
00896 Mul_SaveAcc(22, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
00897 Mul_SaveAcc(23, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
00898 Mul_SaveAcc(24, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
00899 Mul_SaveAcc(25, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
00900 Mul_SaveAcc(26, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
00901 Mul_SaveAcc(27, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
00902 Mul_SaveAcc(28, 14, 15) Mul_Acc(15, 14) \
00903 Mul_End(29, 15)
00904
00905 #define Squ_2 \
00906 Squ_Begin(2) \
00907 Squ_End(2)
00908
00909 #define Squ_4 \
00910 Squ_Begin(4) \
00911 Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
00912 Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
00913 Squ_SaveAcc(3, 1, 3) Squ_Diag(2) \
00914 Squ_SaveAcc(4, 2, 3) Squ_NonDiag \
00915 Squ_End(4)
00916
00917 #define Squ_8 \
00918 Squ_Begin(8) \
00919 Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
00920 Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
00921 Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
00922 Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
00923 Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
00924 Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
00925 Squ_SaveAcc(7, 1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
00926 Squ_SaveAcc(8, 2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
00927 Squ_SaveAcc(9, 3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
00928 Squ_SaveAcc(10, 4, 7) Squ_Acc(5, 6) Squ_NonDiag \
00929 Squ_SaveAcc(11, 5, 7) Squ_Diag(6) \
00930 Squ_SaveAcc(12, 6, 7) Squ_NonDiag \
00931 Squ_End(8)
00932
00933 #define Squ_16 \
00934 Squ_Begin(16) \
00935 Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
00936 Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
00937 Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
00938 Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
00939 Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
00940 Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
00941 Squ_SaveAcc(7, 0, 8) Squ_Acc(1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
00942 Squ_SaveAcc(8, 0, 9) Squ_Acc(1, 8) Squ_Acc(2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
00943 Squ_SaveAcc(9, 0, 10) Squ_Acc(1, 9) Squ_Acc(2, 8) Squ_Acc(3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
00944 Squ_SaveAcc(10, 0, 11) Squ_Acc(1, 10) Squ_Acc(2, 9) Squ_Acc(3, 8) Squ_Acc(4, 7) Squ_Acc(5, 6) Squ_NonDiag \
00945 Squ_SaveAcc(11, 0, 12) Squ_Acc(1, 11) Squ_Acc(2, 10) Squ_Acc(3, 9) Squ_Acc(4, 8) Squ_Acc(5, 7) Squ_Diag(6) \
00946 Squ_SaveAcc(12, 0, 13) Squ_Acc(1, 12) Squ_Acc(2, 11) Squ_Acc(3, 10) Squ_Acc(4, 9) Squ_Acc(5, 8) Squ_Acc(6, 7) Squ_NonDiag \
00947 Squ_SaveAcc(13, 0, 14) Squ_Acc(1, 13) Squ_Acc(2, 12) Squ_Acc(3, 11) Squ_Acc(4, 10) Squ_Acc(5, 9) Squ_Acc(6, 8) Squ_Diag(7) \
00948 Squ_SaveAcc(14, 0, 15) Squ_Acc(1, 14) Squ_Acc(2, 13) Squ_Acc(3, 12) Squ_Acc(4, 11) Squ_Acc(5, 10) Squ_Acc(6, 9) Squ_Acc(7, 8) Squ_NonDiag \
00949 Squ_SaveAcc(15, 1, 15) Squ_Acc(2, 14) Squ_Acc(3, 13) Squ_Acc(4, 12) Squ_Acc(5, 11) Squ_Acc(6, 10) Squ_Acc(7, 9) Squ_Diag(8) \
00950 Squ_SaveAcc(16, 2, 15) Squ_Acc(3, 14) Squ_Acc(4, 13) Squ_Acc(5, 12) Squ_Acc(6, 11) Squ_Acc(7, 10) Squ_Acc(8, 9) Squ_NonDiag \
00951 Squ_SaveAcc(17, 3, 15) Squ_Acc(4, 14) Squ_Acc(5, 13) Squ_Acc(6, 12) Squ_Acc(7, 11) Squ_Acc(8, 10) Squ_Diag(9) \
00952 Squ_SaveAcc(18, 4, 15) Squ_Acc(5, 14) Squ_Acc(6, 13) Squ_Acc(7, 12) Squ_Acc(8, 11) Squ_Acc(9, 10) Squ_NonDiag \
00953 Squ_SaveAcc(19, 5, 15) Squ_Acc(6, 14) Squ_Acc(7, 13) Squ_Acc(8, 12) Squ_Acc(9, 11) Squ_Diag(10) \
00954 Squ_SaveAcc(20, 6, 15) Squ_Acc(7, 14) Squ_Acc(8, 13) Squ_Acc(9, 12) Squ_Acc(10, 11) Squ_NonDiag \
00955 Squ_SaveAcc(21, 7, 15) Squ_Acc(8, 14) Squ_Acc(9, 13) Squ_Acc(10, 12) Squ_Diag(11) \
00956 Squ_SaveAcc(22, 8, 15) Squ_Acc(9, 14) Squ_Acc(10, 13) Squ_Acc(11, 12) Squ_NonDiag \
00957 Squ_SaveAcc(23, 9, 15) Squ_Acc(10, 14) Squ_Acc(11, 13) Squ_Diag(12) \
00958 Squ_SaveAcc(24, 10, 15) Squ_Acc(11, 14) Squ_Acc(12, 13) Squ_NonDiag \
00959 Squ_SaveAcc(25, 11, 15) Squ_Acc(12, 14) Squ_Diag(13) \
00960 Squ_SaveAcc(26, 12, 15) Squ_Acc(13, 14) Squ_NonDiag \
00961 Squ_SaveAcc(27, 13, 15) Squ_Diag(14) \
00962 Squ_SaveAcc(28, 14, 15) Squ_NonDiag \
00963 Squ_End(16)
00964
00965 #define Bot_2 \
00966 Mul_Begin(2) \
00967 Bot_SaveAcc(0, 0, 1) Bot_Acc(1, 0) \
00968 Bot_End(2)
00969
00970 #define Bot_4 \
00971 Mul_Begin(4) \
00972 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00973 Mul_SaveAcc(1, 2, 0) Mul_Acc(1, 1) Mul_Acc(0, 2) \
00974 Bot_SaveAcc(2, 0, 3) Bot_Acc(1, 2) Bot_Acc(2, 1) Bot_Acc(3, 0) \
00975 Bot_End(4)
00976
00977 #define Bot_8 \
00978 Mul_Begin(8) \
00979 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00980 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
00981 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
00982 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
00983 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
00984 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
00985 Bot_SaveAcc(6, 0, 7) Bot_Acc(1, 6) Bot_Acc(2, 5) Bot_Acc(3, 4) Bot_Acc(4, 3) Bot_Acc(5, 2) Bot_Acc(6, 1) Bot_Acc(7, 0) \
00986 Bot_End(8)
00987
00988 #define Bot_16 \
00989 Mul_Begin(16) \
00990 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00991 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
00992 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
00993 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
00994 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
00995 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
00996 Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
00997 Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
00998 Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
00999 Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
01000 Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
01001 Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
01002 Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
01003 Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
01004 Bot_SaveAcc(14, 0, 15) Bot_Acc(1, 14) Bot_Acc(2, 13) Bot_Acc(3, 12) Bot_Acc(4, 11) Bot_Acc(5, 10) Bot_Acc(6, 9) Bot_Acc(7, 8) Bot_Acc(8, 7) Bot_Acc(9, 6) Bot_Acc(10, 5) Bot_Acc(11, 4) Bot_Acc(12, 3) Bot_Acc(13, 2) Bot_Acc(14, 1) Bot_Acc(15, 0) \
01005 Bot_End(16)
01006
01007 #endif
01008
01009 #if 0
01010 #define Mul_Begin(n) \
01011 Declare2Words(p) \
01012 Declare2Words(c) \
01013 Declare2Words(d) \
01014 MultiplyWords(p, A[0], B[0]) \
01015 AssignWord(c, LowWord(p)) \
01016 AssignWord(d, HighWord(p))
01017
01018 #define Mul_Acc(i, j) \
01019 MultiplyWords(p, A[i], B[j]) \
01020 Acc2WordsBy1(c, LowWord(p)) \
01021 Acc2WordsBy1(d, HighWord(p))
01022
01023 #define Mul_SaveAcc(k, i, j) \
01024 R[k] = LowWord(c); \
01025 Add2WordsBy1(c, d, HighWord(c)) \
01026 MultiplyWords(p, A[i], B[j]) \
01027 AssignWord(d, HighWord(p)) \
01028 Acc2WordsBy1(c, LowWord(p))
01029
01030 #define Mul_End(n) \
01031 R[2*n-3] = LowWord(c); \
01032 Acc2WordsBy1(d, HighWord(c)) \
01033 MultiplyWords(p, A[n-1], B[n-1])\
01034 Acc2WordsBy2(d, p) \
01035 R[2*n-2] = LowWord(d); \
01036 R[2*n-1] = HighWord(d);
01037
01038 #define Bot_SaveAcc(k, i, j) \
01039 R[k] = LowWord(c); \
01040 word e = LowWord(d) + HighWord(c); \
01041 e += A[i] * B[j];
01042
01043 #define Bot_Acc(i, j) \
01044 e += A[i] * B[j];
01045
01046 #define Bot_End(n) \
01047 R[n-1] = e;
01048 #else
01049 #define Mul_Begin(n) \
01050 Declare2Words(p) \
01051 word c; \
01052 Declare2Words(d) \
01053 MultiplyWords(p, A[0], B[0]) \
01054 c = LowWord(p); \
01055 AssignWord(d, HighWord(p))
01056
01057 #define Mul_Acc(i, j) \
01058 MulAcc(c, d, A[i], B[j])
01059
01060 #define Mul_SaveAcc(k, i, j) \
01061 R[k] = c; \
01062 c = LowWord(d); \
01063 AssignWord(d, HighWord(d)) \
01064 MulAcc(c, d, A[i], B[j])
01065
01066 #define Mul_End(k, i) \
01067 R[k] = c; \
01068 MultiplyWords(p, A[i], B[i]) \
01069 Acc2WordsBy2(p, d) \
01070 R[k+1] = LowWord(p); \
01071 R[k+2] = HighWord(p);
01072
01073 #define Bot_SaveAcc(k, i, j) \
01074 R[k] = c; \
01075 c = LowWord(d); \
01076 c += A[i] * B[j];
01077
01078 #define Bot_Acc(i, j) \
01079 c += A[i] * B[j];
01080
01081 #define Bot_End(n) \
01082 R[n-1] = c;
01083 #endif
01084
01085 #define Squ_Begin(n) \
01086 Declare2Words(p) \
01087 word c; \
01088 Declare2Words(d) \
01089 Declare2Words(e) \
01090 MultiplyWords(p, A[0], A[0]) \
01091 R[0] = LowWord(p); \
01092 AssignWord(e, HighWord(p)) \
01093 MultiplyWords(p, A[0], A[1]) \
01094 c = LowWord(p); \
01095 AssignWord(d, HighWord(p)) \
01096 Squ_NonDiag \
01097
01098 #define Squ_NonDiag \
01099 Double3Words(c, d)
01100
01101 #define Squ_SaveAcc(k, i, j) \
01102 Acc3WordsBy2(c, d, e) \
01103 R[k] = c; \
01104 MultiplyWords(p, A[i], A[j]) \
01105 c = LowWord(p); \
01106 AssignWord(d, HighWord(p)) \
01107
01108 #define Squ_Acc(i, j) \
01109 MulAcc(c, d, A[i], A[j])
01110
01111 #define Squ_Diag(i) \
01112 Squ_NonDiag \
01113 MulAcc(c, d, A[i], A[i])
01114
01115 #define Squ_End(n) \
01116 Acc3WordsBy2(c, d, e) \
01117 R[2*n-3] = c; \
01118 MultiplyWords(p, A[n-1], A[n-1])\
01119 Acc2WordsBy2(p, e) \
01120 R[2*n-2] = LowWord(p); \
01121 R[2*n-1] = HighWord(p);
01122
01123 void Baseline_Multiply2(word *R, const word *A, const word *B)
01124 {
01125 Mul_2
01126 }
01127
01128 void Baseline_Multiply4(word *R, const word *A, const word *B)
01129 {
01130 Mul_4
01131 }
01132
01133 void Baseline_Multiply8(word *R, const word *A, const word *B)
01134 {
01135 Mul_8
01136 }
01137
01138 void Baseline_Square2(word *R, const word *A)
01139 {
01140 Squ_2
01141 }
01142
01143 void Baseline_Square4(word *R, const word *A)
01144 {
01145 Squ_4
01146 }
01147
01148 void Baseline_Square8(word *R, const word *A)
01149 {
01150 Squ_8
01151 }
01152
01153 void Baseline_MultiplyBottom2(word *R, const word *A, const word *B)
01154 {
01155 Bot_2
01156 }
01157
01158 void Baseline_MultiplyBottom4(word *R, const word *A, const word *B)
01159 {
01160 Bot_4
01161 }
01162
01163 void Baseline_MultiplyBottom8(word *R, const word *A, const word *B)
01164 {
01165 Bot_8
01166 }
01167
01168 #define Top_Begin(n) \
01169 Declare2Words(p) \
01170 word c; \
01171 Declare2Words(d) \
01172 MultiplyWords(p, A[0], B[n-2]);\
01173 AssignWord(d, HighWord(p));
01174
01175 #define Top_Acc(i, j) \
01176 MultiplyWords(p, A[i], B[j]);\
01177 Acc2WordsBy1(d, HighWord(p));
01178
01179 #define Top_SaveAcc0(i, j) \
01180 c = LowWord(d); \
01181 AssignWord(d, HighWord(d)) \
01182 MulAcc(c, d, A[i], B[j])
01183
01184 #define Top_SaveAcc1(i, j) \
01185 c = L<c; \
01186 Acc2WordsBy1(d, c); \
01187 c = LowWord(d); \
01188 AssignWord(d, HighWord(d)) \
01189 MulAcc(c, d, A[i], B[j])
01190
01191 void Baseline_MultiplyTop2(word *R, const word *A, const word *B, word L)
01192 {
01193 word T[4];
01194 Baseline_Multiply2(T, A, B);
01195 R[0] = T[2];
01196 R[1] = T[3];
01197 }
01198
01199 void Baseline_MultiplyTop4(word *R, const word *A, const word *B, word L)
01200 {
01201 Top_Begin(4)
01202 Top_Acc(1, 1) Top_Acc(2, 0) \
01203 Top_SaveAcc0(0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
01204 Top_SaveAcc1(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
01205 Mul_SaveAcc(0, 2, 3) Mul_Acc(3, 2) \
01206 Mul_End(1, 3)
01207 }
01208
01209 void Baseline_MultiplyTop8(word *R, const word *A, const word *B, word L)
01210 {
01211 Top_Begin(8)
01212 Top_Acc(1, 5) Top_Acc(2, 4) Top_Acc(3, 3) Top_Acc(4, 2) Top_Acc(5, 1) Top_Acc(6, 0) \
01213 Top_SaveAcc0(0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
01214 Top_SaveAcc1(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
01215 Mul_SaveAcc(0, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
01216 Mul_SaveAcc(1, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
01217 Mul_SaveAcc(2, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
01218 Mul_SaveAcc(3, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
01219 Mul_SaveAcc(4, 6, 7) Mul_Acc(7, 6) \
01220 Mul_End(5, 7)
01221 }
01222
01223 #if !CRYPTOPP_INTEGER_SSE2 // save memory by not compiling these functions when SSE2 is available
01224 void Baseline_Multiply16(word *R, const word *A, const word *B)
01225 {
01226 Mul_16
01227 }
01228
01229 void Baseline_Square16(word *R, const word *A)
01230 {
01231 Squ_16
01232 }
01233
01234 void Baseline_MultiplyBottom16(word *R, const word *A, const word *B)
01235 {
01236 Bot_16
01237 }
01238
01239 void Baseline_MultiplyTop16(word *R, const word *A, const word *B, word L)
01240 {
01241 Top_Begin(16)
01242 Top_Acc(1, 13) Top_Acc(2, 12) Top_Acc(3, 11) Top_Acc(4, 10) Top_Acc(5, 9) Top_Acc(6, 8) Top_Acc(7, 7) Top_Acc(8, 6) Top_Acc(9, 5) Top_Acc(10, 4) Top_Acc(11, 3) Top_Acc(12, 2) Top_Acc(13, 1) Top_Acc(14, 0) \
01243 Top_SaveAcc0(0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
01244 Top_SaveAcc1(1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
01245 Mul_SaveAcc(0, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
01246 Mul_SaveAcc(1, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
01247 Mul_SaveAcc(2, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
01248 Mul_SaveAcc(3, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
01249 Mul_SaveAcc(4, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
01250 Mul_SaveAcc(5, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
01251 Mul_SaveAcc(6, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
01252 Mul_SaveAcc(7, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
01253 Mul_SaveAcc(8, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
01254 Mul_SaveAcc(9, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
01255 Mul_SaveAcc(10, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
01256 Mul_SaveAcc(11, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
01257 Mul_SaveAcc(12, 14, 15) Mul_Acc(15, 14) \
01258 Mul_End(13, 15)
01259 }
01260 #endif
01261
01262
01263
01264 #if CRYPTOPP_INTEGER_SSE2
01265
01266 CRYPTOPP_ALIGN_DATA(16) static const word32 s_maskLow16[4] CRYPTOPP_SECTION_ALIGN16 = {0xffff,0xffff,0xffff,0xffff};
01267
01268 #undef Mul_Begin
01269 #undef Mul_Acc
01270 #undef Top_Begin
01271 #undef Top_Acc
01272 #undef Squ_Acc
01273 #undef Squ_NonDiag
01274 #undef Squ_Diag
01275 #undef Squ_SaveAcc
01276 #undef Squ_Begin
01277 #undef Mul_SaveAcc
01278 #undef Bot_Acc
01279 #undef Bot_SaveAcc
01280 #undef Bot_End
01281 #undef Squ_End
01282 #undef Mul_End
01283
01284 #define SSE2_FinalSave(k) \
01285 AS2( psllq xmm5, 16) \
01286 AS2( paddq xmm4, xmm5) \
01287 AS2( movq QWORD PTR [ecx+8*(k)], xmm4)
01288
01289 #define SSE2_SaveShift(k) \
01290 AS2( movq xmm0, xmm6) \
01291 AS2( punpckhqdq xmm6, xmm0) \
01292 AS2( movq xmm1, xmm7) \
01293 AS2( punpckhqdq xmm7, xmm1) \
01294 AS2( paddd xmm6, xmm0) \
01295 AS2( pslldq xmm6, 4) \
01296 AS2( paddd xmm7, xmm1) \
01297 AS2( paddd xmm4, xmm6) \
01298 AS2( pslldq xmm7, 4) \
01299 AS2( movq xmm6, xmm4) \
01300 AS2( paddd xmm5, xmm7) \
01301 AS2( movq xmm7, xmm5) \
01302 AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
01303 AS2( psrlq xmm6, 16) \
01304 AS2( paddq xmm6, xmm7) \
01305 AS2( punpckhqdq xmm4, xmm0) \
01306 AS2( punpckhqdq xmm5, xmm0) \
01307 AS2( movq QWORD PTR [ecx+8*(k)+2], xmm6) \
01308 AS2( psrlq xmm6, 3*16) \
01309 AS2( paddd xmm4, xmm6) \
01310
01311 #define Squ_SSE2_SaveShift(k) \
01312 AS2( movq xmm0, xmm6) \
01313 AS2( punpckhqdq xmm6, xmm0) \
01314 AS2( movq xmm1, xmm7) \
01315 AS2( punpckhqdq xmm7, xmm1) \
01316 AS2( paddd xmm6, xmm0) \
01317 AS2( pslldq xmm6, 4) \
01318 AS2( paddd xmm7, xmm1) \
01319 AS2( paddd xmm4, xmm6) \
01320 AS2( pslldq xmm7, 4) \
01321 AS2( movhlps xmm6, xmm4) \
01322 AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
01323 AS2( paddd xmm5, xmm7) \
01324 AS2( movhps QWORD PTR [esp+12], xmm5)\
01325 AS2( psrlq xmm4, 16) \
01326 AS2( paddq xmm4, xmm5) \
01327 AS2( movq QWORD PTR [ecx+8*(k)+2], xmm4) \
01328 AS2( psrlq xmm4, 3*16) \
01329 AS2( paddd xmm4, xmm6) \
01330 AS2( movq QWORD PTR [esp+4], xmm4)\
01331
01332 #define SSE2_FirstMultiply(i) \
01333 AS2( movdqa xmm7, [esi+(i)*16])\
01334 AS2( movdqa xmm5, [edi-(i)*16])\
01335 AS2( pmuludq xmm5, xmm7) \
01336 AS2( movdqa xmm4, [ebx])\
01337 AS2( movdqa xmm6, xmm4) \
01338 AS2( pand xmm4, xmm5) \
01339 AS2( psrld xmm5, 16) \
01340 AS2( pmuludq xmm7, [edx-(i)*16])\
01341 AS2( pand xmm6, xmm7) \
01342 AS2( psrld xmm7, 16)
01343
01344 #define Squ_Begin(n) \
01345 SquPrologue \
01346 AS2( mov esi, esp)\
01347 AS2( and esp, 0xfffffff0)\
01348 AS2( lea edi, [esp-32*n])\
01349 AS2( sub esp, 32*n+16)\
01350 AS1( push esi)\
01351 AS2( mov esi, edi) \
01352 AS2( xor edx, edx) \
01353 ASL(1) \
01354 ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
01355 ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
01356 AS2( movdqa [edi+2*edx], xmm0) \
01357 AS2( psrlq xmm0, 32) \
01358 AS2( movdqa [edi+2*edx+16], xmm0) \
01359 AS2( movdqa [edi+16*n+2*edx], xmm1) \
01360 AS2( psrlq xmm1, 32) \
01361 AS2( movdqa [edi+16*n+2*edx+16], xmm1) \
01362 AS2( add edx, 16) \
01363 AS2( cmp edx, 8*(n)) \
01364 ASJ( jne, 1, b) \
01365 AS2( lea edx, [edi+16*n])\
01366 SSE2_FirstMultiply(0) \
01367
01368 #define Squ_Acc(i) \
01369 ASL(LSqu##i) \
01370 AS2( movdqa xmm1, [esi+(i)*16]) \
01371 AS2( movdqa xmm0, [edi-(i)*16]) \
01372 AS2( movdqa xmm2, [ebx]) \
01373 AS2( pmuludq xmm0, xmm1) \
01374 AS2( pmuludq xmm1, [edx-(i)*16]) \
01375 AS2( movdqa xmm3, xmm2) \
01376 AS2( pand xmm2, xmm0) \
01377 AS2( psrld xmm0, 16) \
01378 AS2( paddd xmm4, xmm2) \
01379 AS2( paddd xmm5, xmm0) \
01380 AS2( pand xmm3, xmm1) \
01381 AS2( psrld xmm1, 16) \
01382 AS2( paddd xmm6, xmm3) \
01383 AS2( paddd xmm7, xmm1) \
01384
01385 #define Squ_Acc1(i)
01386 #define Squ_Acc2(i) ASC(call, LSqu##i)
01387 #define Squ_Acc3(i) Squ_Acc2(i)
01388 #define Squ_Acc4(i) Squ_Acc2(i)
01389 #define Squ_Acc5(i) Squ_Acc2(i)
01390 #define Squ_Acc6(i) Squ_Acc2(i)
01391 #define Squ_Acc7(i) Squ_Acc2(i)
01392 #define Squ_Acc8(i) Squ_Acc2(i)
01393
01394 #define SSE2_End(E, n) \
01395 SSE2_SaveShift(2*(n)-3) \
01396 AS2( movdqa xmm7, [esi+16]) \
01397 AS2( movdqa xmm0, [edi]) \
01398 AS2( pmuludq xmm0, xmm7) \
01399 AS2( movdqa xmm2, [ebx]) \
01400 AS2( pmuludq xmm7, [edx]) \
01401 AS2( movdqa xmm6, xmm2) \
01402 AS2( pand xmm2, xmm0) \
01403 AS2( psrld xmm0, 16) \
01404 AS2( paddd xmm4, xmm2) \
01405 AS2( paddd xmm5, xmm0) \
01406 AS2( pand xmm6, xmm7) \
01407 AS2( psrld xmm7, 16) \
01408 SSE2_SaveShift(2*(n)-2) \
01409 SSE2_FinalSave(2*(n)-1) \
01410 AS1( pop esp)\
01411 E
01412
01413 #define Squ_End(n) SSE2_End(SquEpilogue, n)
01414 #define Mul_End(n) SSE2_End(MulEpilogue, n)
01415 #define Top_End(n) SSE2_End(TopEpilogue, n)
01416
01417 #define Squ_Column1(k, i) \
01418 Squ_SSE2_SaveShift(k) \
01419 AS2( add esi, 16) \
01420 SSE2_FirstMultiply(1)\
01421 Squ_Acc##i(i) \
01422 AS2( paddd xmm4, xmm4) \
01423 AS2( paddd xmm5, xmm5) \
01424 AS2( movdqa xmm3, [esi]) \
01425 AS2( movq xmm1, QWORD PTR [esi+8]) \
01426 AS2( pmuludq xmm1, xmm3) \
01427 AS2( pmuludq xmm3, xmm3) \
01428 AS2( movdqa xmm0, [ebx])\
01429 AS2( movdqa xmm2, xmm0) \
01430 AS2( pand xmm0, xmm1) \
01431 AS2( psrld xmm1, 16) \
01432 AS2( paddd xmm6, xmm0) \
01433 AS2( paddd xmm7, xmm1) \
01434 AS2( pand xmm2, xmm3) \
01435 AS2( psrld xmm3, 16) \
01436 AS2( paddd xmm6, xmm6) \
01437 AS2( paddd xmm7, xmm7) \
01438 AS2( paddd xmm4, xmm2) \
01439 AS2( paddd xmm5, xmm3) \
01440 AS2( movq xmm0, QWORD PTR [esp+4])\
01441 AS2( movq xmm1, QWORD PTR [esp+12])\
01442 AS2( paddd xmm4, xmm0)\
01443 AS2( paddd xmm5, xmm1)\
01444
01445 #define Squ_Column0(k, i) \
01446 Squ_SSE2_SaveShift(k) \
01447 AS2( add edi, 16) \
01448 AS2( add edx, 16) \
01449 SSE2_FirstMultiply(1)\
01450 Squ_Acc##i(i) \
01451 AS2( paddd xmm6, xmm6) \
01452 AS2( paddd xmm7, xmm7) \
01453 AS2( paddd xmm4, xmm4) \
01454 AS2( paddd xmm5, xmm5) \
01455 AS2( movq xmm0, QWORD PTR [esp+4])\
01456 AS2( movq xmm1, QWORD PTR [esp+12])\
01457 AS2( paddd xmm4, xmm0)\
01458 AS2( paddd xmm5, xmm1)\
01459
01460 #define SSE2_MulAdd45 \
01461 AS2( movdqa xmm7, [esi]) \
01462 AS2( movdqa xmm0, [edi]) \
01463 AS2( pmuludq xmm0, xmm7) \
01464 AS2( movdqa xmm2, [ebx]) \
01465 AS2( pmuludq xmm7, [edx]) \
01466 AS2( movdqa xmm6, xmm2) \
01467 AS2( pand xmm2, xmm0) \
01468 AS2( psrld xmm0, 16) \
01469 AS2( paddd xmm4, xmm2) \
01470 AS2( paddd xmm5, xmm0) \
01471 AS2( pand xmm6, xmm7) \
01472 AS2( psrld xmm7, 16)
01473
01474 #define Mul_Begin(n) \
01475 MulPrologue \
01476 AS2( mov esi, esp)\
01477 AS2( and esp, 0xfffffff0)\
01478 AS2( sub esp, 48*n+16)\
01479 AS1( push esi)\
01480 AS2( xor edx, edx) \
01481 ASL(1) \
01482 ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
01483 ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
01484 ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
01485 AS2( movdqa [esp+20+2*edx], xmm0) \
01486 AS2( psrlq xmm0, 32) \
01487 AS2( movdqa [esp+20+2*edx+16], xmm0) \
01488 AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
01489 AS2( psrlq xmm1, 32) \
01490 AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
01491 AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
01492 AS2( psrlq xmm2, 32) \
01493 AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
01494 AS2( add edx, 16) \
01495 AS2( cmp edx, 8*(n)) \
01496 ASJ( jne, 1, b) \
01497 AS2( lea edi, [esp+20])\
01498 AS2( lea edx, [esp+20+16*n])\
01499 AS2( lea esi, [esp+20+32*n])\
01500 SSE2_FirstMultiply(0) \
01501
01502 #define Mul_Acc(i) \
01503 ASL(LMul##i) \
01504 AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
01505 AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
01506 AS2( movdqa xmm2, [ebx]) \
01507 AS2( pmuludq xmm0, xmm1) \
01508 AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
01509 AS2( movdqa xmm3, xmm2) \
01510 AS2( pand xmm2, xmm0) \
01511 AS2( psrld xmm0, 16) \
01512 AS2( paddd xmm4, xmm2) \
01513 AS2( paddd xmm5, xmm0) \
01514 AS2( pand xmm3, xmm1) \
01515 AS2( psrld xmm1, 16) \
01516 AS2( paddd xmm6, xmm3) \
01517 AS2( paddd xmm7, xmm1) \
01518
01519 #define Mul_Acc1(i)
01520 #define Mul_Acc2(i) ASC(call, LMul##i)
01521 #define Mul_Acc3(i) Mul_Acc2(i)
01522 #define Mul_Acc4(i) Mul_Acc2(i)
01523 #define Mul_Acc5(i) Mul_Acc2(i)
01524 #define Mul_Acc6(i) Mul_Acc2(i)
01525 #define Mul_Acc7(i) Mul_Acc2(i)
01526 #define Mul_Acc8(i) Mul_Acc2(i)
01527 #define Mul_Acc9(i) Mul_Acc2(i)
01528 #define Mul_Acc10(i) Mul_Acc2(i)
01529 #define Mul_Acc11(i) Mul_Acc2(i)
01530 #define Mul_Acc12(i) Mul_Acc2(i)
01531 #define Mul_Acc13(i) Mul_Acc2(i)
01532 #define Mul_Acc14(i) Mul_Acc2(i)
01533 #define Mul_Acc15(i) Mul_Acc2(i)
01534 #define Mul_Acc16(i) Mul_Acc2(i)
01535
01536 #define Mul_Column1(k, i) \
01537 SSE2_SaveShift(k) \
01538 AS2( add esi, 16) \
01539 SSE2_MulAdd45\
01540 Mul_Acc##i(i) \
01541
01542 #define Mul_Column0(k, i) \
01543 SSE2_SaveShift(k) \
01544 AS2( add edi, 16) \
01545 AS2( add edx, 16) \
01546 SSE2_MulAdd45\
01547 Mul_Acc##i(i) \
01548
01549 #define Bot_Acc(i) \
01550 AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
01551 AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
01552 AS2( pmuludq xmm0, xmm1) \
01553 AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
01554 AS2( paddq xmm4, xmm0) \
01555 AS2( paddd xmm6, xmm1)
01556
01557 #define Bot_SaveAcc(k) \
01558 SSE2_SaveShift(k) \
01559 AS2( add edi, 16) \
01560 AS2( add edx, 16) \
01561 AS2( movdqa xmm6, [esi]) \
01562 AS2( movdqa xmm0, [edi]) \
01563 AS2( pmuludq xmm0, xmm6) \
01564 AS2( paddq xmm4, xmm0) \
01565 AS2( psllq xmm5, 16) \
01566 AS2( paddq xmm4, xmm5) \
01567 AS2( pmuludq xmm6, [edx])
01568
01569 #define Bot_End(n) \
01570 AS2( movhlps xmm7, xmm6) \
01571 AS2( paddd xmm6, xmm7) \
01572 AS2( psllq xmm6, 32) \
01573 AS2( paddd xmm4, xmm6) \
01574 AS2( movq QWORD PTR [ecx+8*((n)-1)], xmm4) \
01575 AS1( pop esp)\
01576 MulEpilogue
01577
01578 #define Top_Begin(n) \
01579 TopPrologue \
01580 AS2( mov edx, esp)\
01581 AS2( and esp, 0xfffffff0)\
01582 AS2( sub esp, 48*n+16)\
01583 AS1( push edx)\
01584 AS2( xor edx, edx) \
01585 ASL(1) \
01586 ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
01587 ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
01588 ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
01589 AS2( movdqa [esp+20+2*edx], xmm0) \
01590 AS2( psrlq xmm0, 32) \
01591 AS2( movdqa [esp+20+2*edx+16], xmm0) \
01592 AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
01593 AS2( psrlq xmm1, 32) \
01594 AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
01595 AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
01596 AS2( psrlq xmm2, 32) \
01597 AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
01598 AS2( add edx, 16) \
01599 AS2( cmp edx, 8*(n)) \
01600 ASJ( jne, 1, b) \
01601 AS2( mov eax, esi) \
01602 AS2( lea edi, [esp+20+00*n+16*(n/2-1)])\
01603 AS2( lea edx, [esp+20+16*n+16*(n/2-1)])\
01604 AS2( lea esi, [esp+20+32*n+16*(n/2-1)])\
01605 AS2( pxor xmm4, xmm4)\
01606 AS2( pxor xmm5, xmm5)
01607
01608 #define Top_Acc(i) \
01609 AS2( movq xmm0, QWORD PTR [esi+i/2*(1-(i-2*(i/2))*2)*16+8]) \
01610 AS2( pmuludq xmm0, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
01611 AS2( psrlq xmm0, 48) \
01612 AS2( paddd xmm5, xmm0)\
01613
01614 #define Top_Column0(i) \
01615 AS2( psllq xmm5, 32) \
01616 AS2( add edi, 16) \
01617 AS2( add edx, 16) \
01618 SSE2_MulAdd45\
01619 Mul_Acc##i(i) \
01620
01621 #define Top_Column1(i) \
01622 SSE2_SaveShift(0) \
01623 AS2( add esi, 16) \
01624 SSE2_MulAdd45\
01625 Mul_Acc##i(i) \
01626 AS2( shr eax, 16) \
01627 AS2( movd xmm0, eax)\
01628 AS2( movd xmm1, [ecx+4])\
01629 AS2( psrld xmm1, 16)\
01630 AS2( pcmpgtd xmm1, xmm0)\
01631 AS2( psrld xmm1, 31)\
01632 AS2( paddd xmm4, xmm1)\
01633
01634 void SSE2_Square4(word *C, const word *A)
01635 {
01636 Squ_Begin(2)
01637 Squ_Column0(0, 1)
01638 Squ_End(2)
01639 }
01640
01641 void SSE2_Square8(word *C, const word *A)
01642 {
01643 Squ_Begin(4)
01644 #ifndef __GNUC__
01645 ASJ( jmp, 0, f)
01646 Squ_Acc(2)
01647 AS1( ret) ASL(0)
01648 #endif
01649 Squ_Column0(0, 1)
01650 Squ_Column1(1, 1)
01651 Squ_Column0(2, 2)
01652 Squ_Column1(3, 1)
01653 Squ_Column0(4, 1)
01654 Squ_End(4)
01655 }
01656
01657 void SSE2_Square16(word *C, const word *A)
01658 {
01659 Squ_Begin(8)
01660 #ifndef __GNUC__
01661 ASJ( jmp, 0, f)
01662 Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
01663 AS1( ret) ASL(0)
01664 #endif
01665 Squ_Column0(0, 1)
01666 Squ_Column1(1, 1)
01667 Squ_Column0(2, 2)
01668 Squ_Column1(3, 2)
01669 Squ_Column0(4, 3)
01670 Squ_Column1(5, 3)
01671 Squ_Column0(6, 4)
01672 Squ_Column1(7, 3)
01673 Squ_Column0(8, 3)
01674 Squ_Column1(9, 2)
01675 Squ_Column0(10, 2)
01676 Squ_Column1(11, 1)
01677 Squ_Column0(12, 1)
01678 Squ_End(8)
01679 }
01680
01681 void SSE2_Square32(word *C, const word *A)
01682 {
01683 Squ_Begin(16)
01684 ASJ( jmp, 0, f)
01685 Squ_Acc(8) Squ_Acc(7) Squ_Acc(6) Squ_Acc(5) Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
01686 AS1( ret) ASL(0)
01687 Squ_Column0(0, 1)
01688 Squ_Column1(1, 1)
01689 Squ_Column0(2, 2)
01690 Squ_Column1(3, 2)
01691 Squ_Column0(4, 3)
01692 Squ_Column1(5, 3)
01693 Squ_Column0(6, 4)
01694 Squ_Column1(7, 4)
01695 Squ_Column0(8, 5)
01696 Squ_Column1(9, 5)
01697 Squ_Column0(10, 6)
01698 Squ_Column1(11, 6)
01699 Squ_Column0(12, 7)
01700 Squ_Column1(13, 7)
01701 Squ_Column0(14, 8)
01702 Squ_Column1(15, 7)
01703 Squ_Column0(16, 7)
01704 Squ_Column1(17, 6)
01705 Squ_Column0(18, 6)
01706 Squ_Column1(19, 5)
01707 Squ_Column0(20, 5)
01708 Squ_Column1(21, 4)
01709 Squ_Column0(22, 4)
01710 Squ_Column1(23, 3)
01711 Squ_Column0(24, 3)
01712 Squ_Column1(25, 2)
01713 Squ_Column0(26, 2)
01714 Squ_Column1(27, 1)
01715 Squ_Column0(28, 1)
01716 Squ_End(16)
01717 }
01718
01719 void SSE2_Multiply4(word *C, const word *A, const word *B)
01720 {
01721 Mul_Begin(2)
01722 #ifndef __GNUC__
01723 ASJ( jmp, 0, f)
01724 Mul_Acc(2)
01725 AS1( ret) ASL(0)
01726 #endif
01727 Mul_Column0(0, 2)
01728 Mul_End(2)
01729 }
01730
01731 void SSE2_Multiply8(word *C, const word *A, const word *B)
01732 {
01733 Mul_Begin(4)
01734 #ifndef __GNUC__
01735 ASJ( jmp, 0, f)
01736 Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01737 AS1( ret) ASL(0)
01738 #endif
01739 Mul_Column0(0, 2)
01740 Mul_Column1(1, 3)
01741 Mul_Column0(2, 4)
01742 Mul_Column1(3, 3)
01743 Mul_Column0(4, 2)
01744 Mul_End(4)
01745 }
01746
01747 void SSE2_Multiply16(word *C, const word *A, const word *B)
01748 {
01749 Mul_Begin(8)
01750 #ifndef __GNUC__
01751 ASJ( jmp, 0, f)
01752 Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01753 AS1( ret) ASL(0)
01754 #endif
01755 Mul_Column0(0, 2)
01756 Mul_Column1(1, 3)
01757 Mul_Column0(2, 4)
01758 Mul_Column1(3, 5)
01759 Mul_Column0(4, 6)
01760 Mul_Column1(5, 7)
01761 Mul_Column0(6, 8)
01762 Mul_Column1(7, 7)
01763 Mul_Column0(8, 6)
01764 Mul_Column1(9, 5)
01765 Mul_Column0(10, 4)
01766 Mul_Column1(11, 3)
01767 Mul_Column0(12, 2)
01768 Mul_End(8)
01769 }
01770
01771 void SSE2_Multiply32(word *C, const word *A, const word *B)
01772 {
01773 Mul_Begin(16)
01774 ASJ( jmp, 0, f)
01775 Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01776 AS1( ret) ASL(0)
01777 Mul_Column0(0, 2)
01778 Mul_Column1(1, 3)
01779 Mul_Column0(2, 4)
01780 Mul_Column1(3, 5)
01781 Mul_Column0(4, 6)
01782 Mul_Column1(5, 7)
01783 Mul_Column0(6, 8)
01784 Mul_Column1(7, 9)
01785 Mul_Column0(8, 10)
01786 Mul_Column1(9, 11)
01787 Mul_Column0(10, 12)
01788 Mul_Column1(11, 13)
01789 Mul_Column0(12, 14)
01790 Mul_Column1(13, 15)
01791 Mul_Column0(14, 16)
01792 Mul_Column1(15, 15)
01793 Mul_Column0(16, 14)
01794 Mul_Column1(17, 13)
01795 Mul_Column0(18, 12)
01796 Mul_Column1(19, 11)
01797 Mul_Column0(20, 10)
01798 Mul_Column1(21, 9)
01799 Mul_Column0(22, 8)
01800 Mul_Column1(23, 7)
01801 Mul_Column0(24, 6)
01802 Mul_Column1(25, 5)
01803 Mul_Column0(26, 4)
01804 Mul_Column1(27, 3)
01805 Mul_Column0(28, 2)
01806 Mul_End(16)
01807 }
01808
01809 void SSE2_MultiplyBottom4(word *C, const word *A, const word *B)
01810 {
01811 Mul_Begin(2)
01812 Bot_SaveAcc(0) Bot_Acc(2)
01813 Bot_End(2)
01814 }
01815
01816 void SSE2_MultiplyBottom8(word *C, const word *A, const word *B)
01817 {
01818 Mul_Begin(4)
01819 #ifndef __GNUC__
01820 ASJ( jmp, 0, f)
01821 Mul_Acc(3) Mul_Acc(2)
01822 AS1( ret) ASL(0)
01823 #endif
01824 Mul_Column0(0, 2)
01825 Mul_Column1(1, 3)
01826 Bot_SaveAcc(2) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
01827 Bot_End(4)
01828 }
01829
01830 void SSE2_MultiplyBottom16(word *C, const word *A, const word *B)
01831 {
01832 Mul_Begin(8)
01833 #ifndef __GNUC__
01834 ASJ( jmp, 0, f)
01835 Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01836 AS1( ret) ASL(0)
01837 #endif
01838 Mul_Column0(0, 2)
01839 Mul_Column1(1, 3)
01840 Mul_Column0(2, 4)
01841 Mul_Column1(3, 5)
01842 Mul_Column0(4, 6)
01843 Mul_Column1(5, 7)
01844 Bot_SaveAcc(6) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
01845 Bot_End(8)
01846 }
01847
01848 void SSE2_MultiplyBottom32(word *C, const word *A, const word *B)
01849 {
01850 Mul_Begin(16)
01851 #ifndef __GNUC__
01852 ASJ( jmp, 0, f)
01853 Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01854 AS1( ret) ASL(0)
01855 #endif
01856 Mul_Column0(0, 2)
01857 Mul_Column1(1, 3)
01858 Mul_Column0(2, 4)
01859 Mul_Column1(3, 5)
01860 Mul_Column0(4, 6)
01861 Mul_Column1(5, 7)
01862 Mul_Column0(6, 8)
01863 Mul_Column1(7, 9)
01864 Mul_Column0(8, 10)
01865 Mul_Column1(9, 11)
01866 Mul_Column0(10, 12)
01867 Mul_Column1(11, 13)
01868 Mul_Column0(12, 14)
01869 Mul_Column1(13, 15)
01870 Bot_SaveAcc(14) Bot_Acc(16) Bot_Acc(15) Bot_Acc(14) Bot_Acc(13) Bot_Acc(12) Bot_Acc(11) Bot_Acc(10) Bot_Acc(9) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
01871 Bot_End(16)
01872 }
01873
01874 void SSE2_MultiplyTop8(word *C, const word *A, const word *B, word L)
01875 {
01876 Top_Begin(4)
01877 Top_Acc(3) Top_Acc(2) Top_Acc(1)
01878 #ifndef __GNUC__
01879 ASJ( jmp, 0, f)
01880 Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01881 AS1( ret) ASL(0)
01882 #endif
01883 Top_Column0(4)
01884 Top_Column1(3)
01885 Mul_Column0(0, 2)
01886 Top_End(2)
01887 }
01888
01889 void SSE2_MultiplyTop16(word *C, const word *A, const word *B, word L)
01890 {
01891 Top_Begin(8)
01892 Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
01893 #ifndef __GNUC__
01894 ASJ( jmp, 0, f)
01895 Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01896 AS1( ret) ASL(0)
01897 #endif
01898 Top_Column0(8)
01899 Top_Column1(7)
01900 Mul_Column0(0, 6)
01901 Mul_Column1(1, 5)
01902 Mul_Column0(2, 4)
01903 Mul_Column1(3, 3)
01904 Mul_Column0(4, 2)
01905 Top_End(4)
01906 }
01907
01908 void SSE2_MultiplyTop32(word *C, const word *A, const word *B, word L)
01909 {
01910 Top_Begin(16)
01911 Top_Acc(15) Top_Acc(14) Top_Acc(13) Top_Acc(12) Top_Acc(11) Top_Acc(10) Top_Acc(9) Top_Acc(8) Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
01912 #ifndef __GNUC__
01913 ASJ( jmp, 0, f)
01914 Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01915 AS1( ret) ASL(0)
01916 #endif
01917 Top_Column0(16)
01918 Top_Column1(15)
01919 Mul_Column0(0, 14)
01920 Mul_Column1(1, 13)
01921 Mul_Column0(2, 12)
01922 Mul_Column1(3, 11)
01923 Mul_Column0(4, 10)
01924 Mul_Column1(5, 9)
01925 Mul_Column0(6, 8)
01926 Mul_Column1(7, 7)
01927 Mul_Column0(8, 6)
01928 Mul_Column1(9, 5)
01929 Mul_Column0(10, 4)
01930 Mul_Column1(11, 3)
01931 Mul_Column0(12, 2)
01932 Top_End(8)
01933 }
01934
01935 #endif // #if CRYPTOPP_INTEGER_SSE2
01936
01937
01938
01939 typedef int (CRYPTOPP_FASTCALL * PAdd)(size_t N, word *C, const word *A, const word *B);
01940 typedef void (* PMul)(word *C, const word *A, const word *B);
01941 typedef void (* PSqu)(word *C, const word *A);
01942 typedef void (* PMulTop)(word *C, const word *A, const word *B, word L);
01943
01944 #if CRYPTOPP_INTEGER_SSE2
01945 static PAdd s_pAdd = &Baseline_Add, s_pSub = &Baseline_Sub;
01946 static size_t s_recursionLimit = 8;
01947 #else
01948 static const size_t s_recursionLimit = 16;
01949 #endif
01950
01951 static PMul s_pMul[9], s_pBot[9];
01952 static PSqu s_pSqu[9];
01953 static PMulTop s_pTop[9];
01954
01955 static void SetFunctionPointers()
01956 {
01957 s_pMul[0] = &Baseline_Multiply2;
01958 s_pBot[0] = &Baseline_MultiplyBottom2;
01959 s_pSqu[0] = &Baseline_Square2;
01960 s_pTop[0] = &Baseline_MultiplyTop2;
01961 s_pTop[1] = &Baseline_MultiplyTop4;
01962
01963 #if CRYPTOPP_INTEGER_SSE2
01964 if (HasSSE2())
01965 {
01966 #if _MSC_VER != 1200 || defined(NDEBUG)
01967 if (IsP4())
01968 {
01969 s_pAdd = &SSE2_Add;
01970 s_pSub = &SSE2_Sub;
01971 }
01972 #endif
01973
01974 s_recursionLimit = 32;
01975
01976 s_pMul[1] = &SSE2_Multiply4;
01977 s_pMul[2] = &SSE2_Multiply8;
01978 s_pMul[4] = &SSE2_Multiply16;
01979 s_pMul[8] = &SSE2_Multiply32;
01980
01981 s_pBot[1] = &SSE2_MultiplyBottom4;
01982 s_pBot[2] = &SSE2_MultiplyBottom8;
01983 s_pBot[4] = &SSE2_MultiplyBottom16;
01984 s_pBot[8] = &SSE2_MultiplyBottom32;
01985
01986 s_pSqu[1] = &SSE2_Square4;
01987 s_pSqu[2] = &SSE2_Square8;
01988 s_pSqu[4] = &SSE2_Square16;
01989 s_pSqu[8] = &SSE2_Square32;
01990
01991 s_pTop[2] = &SSE2_MultiplyTop8;
01992 s_pTop[4] = &SSE2_MultiplyTop16;
01993 s_pTop[8] = &SSE2_MultiplyTop32;
01994 }
01995 else
01996 #endif
01997 {
01998 s_pMul[1] = &Baseline_Multiply4;
01999 s_pMul[2] = &Baseline_Multiply8;
02000
02001 s_pBot[1] = &Baseline_MultiplyBottom4;
02002 s_pBot[2] = &Baseline_MultiplyBottom8;
02003
02004 s_pSqu[1] = &Baseline_Square4;
02005 s_pSqu[2] = &Baseline_Square8;
02006
02007 s_pTop[2] = &Baseline_MultiplyTop8;
02008
02009 #if !CRYPTOPP_INTEGER_SSE2
02010 s_pMul[4] = &Baseline_Multiply16;
02011 s_pBot[4] = &Baseline_MultiplyBottom16;
02012 s_pSqu[4] = &Baseline_Square16;
02013 s_pTop[4] = &Baseline_MultiplyTop16;
02014 #endif
02015 }
02016 }
02017
02018 inline int Add(word *C, const word *A, const word *B, size_t N)
02019 {
02020 #if CRYPTOPP_INTEGER_SSE2
02021 return s_pAdd(N, C, A, B);
02022 #else
02023 return Baseline_Add(N, C, A, B);
02024 #endif
02025 }
02026
02027 inline int Subtract(word *C, const word *A, const word *B, size_t N)
02028 {
02029 #if CRYPTOPP_INTEGER_SSE2
02030 return s_pSub(N, C, A, B);
02031 #else
02032 return Baseline_Sub(N, C, A, B);
02033 #endif
02034 }
02035
02036
02037
02038
02039 #define A0 A
02040 #define A1 (A+N2)
02041 #define B0 B
02042 #define B1 (B+N2)
02043
02044 #define T0 T
02045 #define T1 (T+N2)
02046 #define T2 (T+N)
02047 #define T3 (T+N+N2)
02048
02049 #define R0 R
02050 #define R1 (R+N2)
02051 #define R2 (R+N)
02052 #define R3 (R+N+N2)
02053
02054
02055
02056
02057
02058
02059 void RecursiveMultiply(word *R, word *T, const word *A, const word *B, size_t N)
02060 {
02061 assert(N>=2 && N%2==0);
02062
02063 if (N <= s_recursionLimit)
02064 s_pMul[N/4](R, A, B);
02065 else
02066 {
02067 const size_t N2 = N/2;
02068
02069 size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
02070 Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
02071
02072 size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
02073 Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
02074
02075 RecursiveMultiply(R2, T2, A1, B1, N2);
02076 RecursiveMultiply(T0, T2, R0, R1, N2);
02077 RecursiveMultiply(R0, T2, A0, B0, N2);
02078
02079
02080
02081 int c2 = Add(R2, R2, R1, N2);
02082 int c3 = c2;
02083 c2 += Add(R1, R2, R0, N2);
02084 c3 += Add(R2, R2, R3, N2);
02085
02086 if (AN2 == BN2)
02087 c3 -= Subtract(R1, R1, T0, N);
02088 else
02089 c3 += Add(R1, R1, T0, N);
02090
02091 c3 += Increment(R2, N2, c2);
02092 assert (c3 >= 0 && c3 <= 2);
02093 Increment(R3, N2, c3);
02094 }
02095 }
02096
02097
02098
02099
02100
02101 void RecursiveSquare(word *R, word *T, const word *A, size_t N)
02102 {
02103 assert(N && N%2==0);
02104
02105 if (N <= s_recursionLimit)
02106 s_pSqu[N/4](R, A);
02107 else
02108 {
02109 const size_t N2 = N/2;
02110
02111 RecursiveSquare(R0, T2, A0, N2);
02112 RecursiveSquare(R2, T2, A1, N2);
02113 RecursiveMultiply(T0, T2, A0, A1, N2);
02114
02115 int carry = Add(R1, R1, T0, N);
02116 carry += Add(R1, R1, T0, N);
02117 Increment(R3, N2, carry);
02118 }
02119 }
02120
02121
02122
02123
02124
02125
02126 void RecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
02127 {
02128 assert(N>=2 && N%2==0);
02129
02130 if (N <= s_recursionLimit)
02131 s_pBot[N/4](R, A, B);
02132 else
02133 {
02134 const size_t N2 = N/2;
02135
02136 RecursiveMultiply(R, T, A0, B0, N2);
02137 RecursiveMultiplyBottom(T0, T1, A1, B0, N2);
02138 Add(R1, R1, T0, N2);
02139 RecursiveMultiplyBottom(T0, T1, A0, B1, N2);
02140 Add(R1, R1, T0, N2);
02141 }
02142 }
02143
02144
02145
02146
02147
02148
02149
02150 void MultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, size_t N)
02151 {
02152 assert(N>=2 && N%2==0);
02153
02154 if (N <= s_recursionLimit)
02155 s_pTop[N/4](R, A, B, L[N-1]);
02156 else
02157 {
02158 const size_t N2 = N/2;
02159
02160 size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
02161 Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
02162
02163 size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
02164 Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
02165
02166 RecursiveMultiply(T0, T2, R0, R1, N2);
02167 RecursiveMultiply(R0, T2, A1, B1, N2);
02168
02169
02170
02171 int t, c3;
02172 int c2 = Subtract(T2, L+N2, L, N2);
02173
02174 if (AN2 == BN2)
02175 {
02176 c2 -= Add(T2, T2, T0, N2);
02177 t = (Compare(T2, R0, N2) == -1);
02178 c3 = t - Subtract(T2, T2, T1, N2);
02179 }
02180 else
02181 {
02182 c2 += Subtract(T2, T2, T0, N2);
02183 t = (Compare(T2, R0, N2) == -1);
02184 c3 = t + Add(T2, T2, T1, N2);
02185 }
02186
02187 c2 += t;
02188 if (c2 >= 0)
02189 c3 += Increment(T2, N2, c2);
02190 else
02191 c3 -= Decrement(T2, N2, -c2);
02192 c3 += Add(R0, T2, R1, N2);
02193
02194 assert (c3 >= 0 && c3 <= 2);
02195 Increment(R1, N2, c3);
02196 }
02197 }
02198
02199 inline void Multiply(word *R, word *T, const word *A, const word *B, size_t N)
02200 {
02201 RecursiveMultiply(R, T, A, B, N);
02202 }
02203
02204 inline void Square(word *R, word *T, const word *A, size_t N)
02205 {
02206 RecursiveSquare(R, T, A, N);
02207 }
02208
02209 inline void MultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
02210 {
02211 RecursiveMultiplyBottom(R, T, A, B, N);
02212 }
02213
02214
02215
02216
02217
02218
02219 void AsymmetricMultiply(word *R, word *T, const word *A, size_t NA, const word *B, size_t NB)
02220 {
02221 if (NA == NB)
02222 {
02223 if (A == B)
02224 Square(R, T, A, NA);
02225 else
02226 Multiply(R, T, A, B, NA);
02227
02228 return;
02229 }
02230
02231 if (NA > NB)
02232 {
02233 std::swap(A, B);
02234 std::swap(NA, NB);
02235 }
02236
02237 assert(NB % NA == 0);
02238
02239 if (NA==2 && !A[1])
02240 {
02241 switch (A[0])
02242 {
02243 case 0:
02244 SetWords(R, 0, NB+2);
02245 return;
02246 case 1:
02247 CopyWords(R, B, NB);
02248 R[NB] = R[NB+1] = 0;
02249 return;
02250 default:
02251 R[NB] = LinearMultiply(R, B, A[0], NB);
02252 R[NB+1] = 0;
02253 return;
02254 }
02255 }
02256
02257 size_t i;
02258 if ((NB/NA)%2 == 0)
02259 {
02260 Multiply(R, T, A, B, NA);
02261 CopyWords(T+2*NA, R+NA, NA);
02262
02263 for (i=2*NA; i<NB; i+=2*NA)
02264 Multiply(T+NA+i, T, A, B+i, NA);
02265 for (i=NA; i<NB; i+=2*NA)
02266 Multiply(R+i, T, A, B+i, NA);
02267 }
02268 else
02269 {
02270 for (i=0; i<NB; i+=2*NA)
02271 Multiply(R+i, T, A, B+i, NA);
02272 for (i=NA; i<NB; i+=2*NA)
02273 Multiply(T+NA+i, T, A, B+i, NA);
02274 }
02275
02276 if (Add(R+NA, R+NA, T+2*NA, NB-NA))
02277 Increment(R+NB, NA);
02278 }
02279
02280
02281
02282
02283
02284 void RecursiveInverseModPower2(word *R, word *T, const word *A, size_t N)
02285 {
02286 if (N==2)
02287 {
02288 T[0] = AtomicInverseModPower2(A[0]);
02289 T[1] = 0;
02290 s_pBot[0](T+2, T, A);
02291 TwosComplement(T+2, 2);
02292 Increment(T+2, 2, 2);
02293 s_pBot[0](R, T, T+2);
02294 }
02295 else
02296 {
02297 const size_t N2 = N/2;
02298 RecursiveInverseModPower2(R0, T0, A0, N2);
02299 T0[0] = 1;
02300 SetWords(T0+1, 0, N2-1);
02301 MultiplyTop(R1, T1, T0, R0, A0, N2);
02302 MultiplyBottom(T0, T1, R0, A1, N2);
02303 Add(T0, R1, T0, N2);
02304 TwosComplement(T0, N2);
02305 MultiplyBottom(R1, T1, R0, T0, N2);
02306 }
02307 }
02308
02309
02310
02311
02312
02313
02314
02315 void MontgomeryReduce(word *R, word *T, word *X, const word *M, const word *U, size_t N)
02316 {
02317 #if 1
02318 MultiplyBottom(R, T, X, U, N);
02319 MultiplyTop(T, T+N, X, R, M, N);
02320 word borrow = Subtract(T, X+N, T, N);
02321
02322 word carry = Add(T+N, T, M, N);
02323 assert(carry | !borrow);
02324 CopyWords(R, T + ((0-borrow) & N), N);
02325 #elif 0
02326 const word u = 0-U[0];
02327 Declare2Words(p)
02328 for (size_t i=0; i<N; i++)
02329 {
02330 const word t = u * X[i];
02331 word c = 0;
02332 for (size_t j=0; j<N; j+=2)
02333 {
02334 MultiplyWords(p, t, M[j]);
02335 Acc2WordsBy1(p, X[i+j]);
02336 Acc2WordsBy1(p, c);
02337 X[i+j] = LowWord(p);
02338 c = HighWord(p);
02339 MultiplyWords(p, t, M[j+1]);
02340 Acc2WordsBy1(p, X[i+j+1]);
02341 Acc2WordsBy1(p, c);
02342 X[i+j+1] = LowWord(p);
02343 c = HighWord(p);
02344 }
02345
02346 if (Increment(X+N+i, N-i, c))
02347 while (!Subtract(X+N, X+N, M, N)) {}
02348 }
02349
02350 memcpy(R, X+N, N*WORD_SIZE);
02351 #else
02352 __m64 u = _mm_cvtsi32_si64(0-U[0]), p;
02353 for (size_t i=0; i<N; i++)
02354 {
02355 __m64 t = _mm_cvtsi32_si64(X[i]);
02356 t = _mm_mul_su32(t, u);
02357 __m64 c = _mm_setzero_si64();
02358 for (size_t j=0; j<N; j+=2)
02359 {
02360 p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j]));
02361 p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j]));
02362 c = _mm_add_si64(c, p);
02363 X[i+j] = _mm_cvtsi64_si32(c);
02364 c = _mm_srli_si64(c, 32);
02365 p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j+1]));
02366 p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j+1]));
02367 c = _mm_add_si64(c, p);
02368 X[i+j+1] = _mm_cvtsi64_si32(c);
02369 c = _mm_srli_si64(c, 32);
02370 }
02371
02372 if (Increment(X+N+i, N-i, _mm_cvtsi64_si32(c)))
02373 while (!Subtract(X+N, X+N, M, N)) {}
02374 }
02375
02376 memcpy(R, X+N, N*WORD_SIZE);
02377 _mm_empty();
02378 #endif
02379 }
02380
02381
02382
02383
02384
02385
02386
02387
02388 void HalfMontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, const word *V, size_t N)
02389 {
02390 assert(N%2==0 && N>=4);
02391
02392 #define M0 M
02393 #define M1 (M+N2)
02394 #define V0 V
02395 #define V1 (V+N2)
02396
02397 #define X0 X
02398 #define X1 (X+N2)
02399 #define X2 (X+N)
02400 #define X3 (X+N+N2)
02401
02402 const size_t N2 = N/2;
02403 Multiply(T0, T2, V0, X3, N2);
02404 int c2 = Add(T0, T0, X0, N);
02405 MultiplyBottom(T3, T2, T0, U, N2);
02406 MultiplyTop(T2, R, T0, T3, M0, N2);
02407 c2 -= Subtract(T2, T1, T2, N2);
02408 Multiply(T0, R, T3, M1, N2);
02409 c2 -= Subtract(T0, T2, T0, N2);
02410 int c3 = -(int)Subtract(T1, X2, T1, N2);
02411 Multiply(R0, T2, V1, X3, N2);
02412 c3 += Add(R, R, T, N);
02413
02414 if (c2>0)
02415 c3 += Increment(R1, N2);
02416 else if (c2<0)
02417 c3 -= Decrement(R1, N2, -c2);
02418
02419 assert(c3>=-1 && c3<=1);
02420 if (c3>0)
02421 Subtract(R, R, M, N);
02422 else if (c3<0)
02423 Add(R, R, M, N);
02424
02425 #undef M0
02426 #undef M1
02427 #undef V0
02428 #undef V1
02429
02430 #undef X0
02431 #undef X1
02432 #undef X2
02433 #undef X3
02434 }
02435
02436 #undef A0
02437 #undef A1
02438 #undef B0
02439 #undef B1
02440
02441 #undef T0
02442 #undef T1
02443 #undef T2
02444 #undef T3
02445
02446 #undef R0
02447 #undef R1
02448 #undef R2
02449 #undef R3
02450
02451
02452
02453
02454
02455
02456
02457
02458
02459
02460
02461
02462
02463
02464
02465
02466
02467
02468
02469
02470
02471
02472
02473
02474
02475
02476
02477
02478
02479
02480
02481
02482
02483
02484
02485
02486
02487
02488
02489
02490
02491
02492
02493
02494
02495
02496
02497
02498
02499
02500
02501
02502
02503
02504
02505
02506
02507
02508
02509
02510
02511
02512
02513
02514
02515 static inline void AtomicDivide(word *Q, const word *A, const word *B)
02516 {
02517 word T[4];
02518 DWord q = DivideFourWordsByTwo<word, DWord>(T, DWord(A[0], A[1]), DWord(A[2], A[3]), DWord(B[0], B[1]));
02519 Q[0] = q.GetLowHalf();
02520 Q[1] = q.GetHighHalf();
02521
02522 #ifndef NDEBUG
02523 if (B[0] || B[1])
02524 {
02525
02526 assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
02527 word P[4];
02528 s_pMul[0](P, Q, B);
02529 Add(P, P, T, 4);
02530 assert(memcmp(P, A, 4*WORD_SIZE)==0);
02531 }
02532 #endif
02533 }
02534
02535
02536 static void CorrectQuotientEstimate(word *R, word *T, word *Q, const word *B, size_t N)
02537 {
02538 assert(N && N%2==0);
02539
02540 AsymmetricMultiply(T, T+N+2, Q, 2, B, N);
02541
02542 word borrow = Subtract(R, R, T, N+2);
02543 assert(!borrow && !R[N+1]);
02544
02545 while (R[N] || Compare(R, B, N) >= 0)
02546 {
02547 R[N] -= Subtract(R, R, B, N);
02548 Q[1] += (++Q[0]==0);
02549 assert(Q[0] || Q[1]);
02550 }
02551 }
02552
02553
02554
02555
02556
02557
02558
02559 void Divide(word *R, word *Q, word *T, const word *A, size_t NA, const word *B, size_t NB)
02560 {
02561 assert(NA && NB && NA%2==0 && NB%2==0);
02562 assert(B[NB-1] || B[NB-2]);
02563 assert(NB <= NA);
02564
02565
02566 word *const TA=T;
02567 word *const TB=T+NA+2;
02568 word *const TP=T+NA+2+NB;
02569
02570
02571 unsigned shiftWords = (B[NB-1]==0);
02572 TB[0] = TB[NB-1] = 0;
02573 CopyWords(TB+shiftWords, B, NB-shiftWords);
02574 unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]);
02575 assert(shiftBits < WORD_BITS);
02576 ShiftWordsLeftByBits(TB, NB, shiftBits);
02577
02578
02579 TA[0] = TA[NA] = TA[NA+1] = 0;
02580 CopyWords(TA+shiftWords, A, NA);
02581 ShiftWordsLeftByBits(TA, NA+2, shiftBits);
02582
02583 if (TA[NA+1]==0 && TA[NA] <= 1)
02584 {
02585 Q[NA-NB+1] = Q[NA-NB] = 0;
02586 while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0)
02587 {
02588 TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB);
02589 ++Q[NA-NB];
02590 }
02591 }
02592 else
02593 {
02594 NA+=2;
02595 assert(Compare(TA+NA-NB, TB, NB) < 0);
02596 }
02597
02598 word BT[2];
02599 BT[0] = TB[NB-2] + 1;
02600 BT[1] = TB[NB-1] + (BT[0]==0);
02601
02602
02603 for (size_t i=NA-2; i>=NB; i-=2)
02604 {
02605 AtomicDivide(Q+i-NB, TA+i-2, BT);
02606 CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB);
02607 }
02608
02609
02610 CopyWords(R, TA+shiftWords, NB);
02611 ShiftWordsRightByBits(R, NB, shiftBits);
02612 }
02613
02614 static inline size_t EvenWordCount(const word *X, size_t N)
02615 {
02616 while (N && X[N-2]==0 && X[N-1]==0)
02617 N-=2;
02618 return N;
02619 }
02620
02621
02622
02623
02624
02625
02626
02627 unsigned int AlmostInverse(word *R, word *T, const word *A, size_t NA, const word *M, size_t N)
02628 {
02629 assert(NA<=N && N && N%2==0);
02630
02631 word *b = T;
02632 word *c = T+N;
02633 word *f = T+2*N;
02634 word *g = T+3*N;
02635 size_t bcLen=2, fgLen=EvenWordCount(M, N);
02636 unsigned int k=0, s=0;
02637
02638 SetWords(T, 0, 3*N);
02639 b[0]=1;
02640 CopyWords(f, A, NA);
02641 CopyWords(g, M, N);
02642
02643 while (1)
02644 {
02645 word t=f[0];
02646 while (!t)
02647 {
02648 if (EvenWordCount(f, fgLen)==0)
02649 {
02650 SetWords(R, 0, N);
02651 return 0;
02652 }
02653
02654 ShiftWordsRightByWords(f, fgLen, 1);
02655 if (c[bcLen-1]) bcLen+=2;
02656 assert(bcLen <= N);
02657 ShiftWordsLeftByWords(c, bcLen, 1);
02658 k+=WORD_BITS;
02659 t=f[0];
02660 }
02661
02662 unsigned int i=0;
02663 while (t%2 == 0)
02664 {
02665 t>>=1;
02666 i++;
02667 }
02668 k+=i;
02669
02670 if (t==1 && f[1]==0 && EvenWordCount(f, fgLen)==2)
02671 {
02672 if (s%2==0)
02673 CopyWords(R, b, N);
02674 else
02675 Subtract(R, M, b, N);
02676 return k;
02677 }
02678
02679 ShiftWordsRightByBits(f, fgLen, i);
02680 t=ShiftWordsLeftByBits(c, bcLen, i);
02681 if (t)
02682 {
02683 c[bcLen] = t;
02684 bcLen+=2;
02685 assert(bcLen <= N);
02686 }
02687
02688 if (f[fgLen-2]==0 && g[fgLen-2]==0 && f[fgLen-1]==0 && g[fgLen-1]==0)
02689 fgLen-=2;
02690
02691 if (Compare(f, g, fgLen)==-1)
02692 {
02693 std::swap(f, g);
02694 std::swap(b, c);
02695 s++;
02696 }
02697
02698 Subtract(f, f, g, fgLen);
02699
02700 if (Add(b, b, c, bcLen))
02701 {
02702 b[bcLen] = 1;
02703 bcLen+=2;
02704 assert(bcLen <= N);
02705 }
02706 }
02707 }
02708
02709
02710
02711
02712
02713 void DivideByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
02714 {
02715 CopyWords(R, A, N);
02716
02717 while (k--)
02718 {
02719 if (R[0]%2==0)
02720 ShiftWordsRightByBits(R, N, 1);
02721 else
02722 {
02723 word carry = Add(R, R, M, N);
02724 ShiftWordsRightByBits(R, N, 1);
02725 R[N-1] += carry<<(WORD_BITS-1);
02726 }
02727 }
02728 }
02729
02730
02731
02732
02733
02734 void MultiplyByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
02735 {
02736 CopyWords(R, A, N);
02737
02738 while (k--)
02739 if (ShiftWordsLeftByBits(R, N, 1) || Compare(R, M, N)>=0)
02740 Subtract(R, R, M, N);
02741 }
02742
02743
02744
02745 InitializeInteger::InitializeInteger()
02746 {
02747 if (!g_pAssignIntToInteger)
02748 {
02749 SetFunctionPointers();
02750 g_pAssignIntToInteger = AssignIntToInteger;
02751 }
02752 }
02753
02754 static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8};
02755
02756 static inline size_t RoundupSize(size_t n)
02757 {
02758 if (n<=8)
02759 return RoundupSizeTable[n];
02760 else if (n<=16)
02761 return 16;
02762 else if (n<=32)
02763 return 32;
02764 else if (n<=64)
02765 return 64;
02766 else return size_t(1) << BitPrecision(n-1);
02767 }
02768
02769 Integer::Integer()
02770 : reg(2), sign(POSITIVE)
02771 {
02772 reg[0] = reg[1] = 0;
02773 }
02774
02775 Integer::Integer(const Integer& t)
02776 : reg(RoundupSize(t.WordCount())), sign(t.sign)
02777 {
02778 CopyWords(reg, t.reg, reg.size());
02779 }
02780
02781 Integer::Integer(Sign s, lword value)
02782 : reg(2), sign(s)
02783 {
02784 reg[0] = word(value);
02785 reg[1] = word(SafeRightShift<WORD_BITS>(value));
02786 }
02787
02788 Integer::Integer(signed long value)
02789 : reg(2)
02790 {
02791 if (value >= 0)
02792 sign = POSITIVE;
02793 else
02794 {
02795 sign = NEGATIVE;
02796 value = -value;
02797 }
02798 reg[0] = word(value);
02799 reg[1] = word(SafeRightShift<WORD_BITS>((unsigned long)value));
02800 }
02801
02802 Integer::Integer(Sign s, word high, word low)
02803 : reg(2), sign(s)
02804 {
02805 reg[0] = low;
02806 reg[1] = high;
02807 }
02808
02809 bool Integer::IsConvertableToLong() const
02810 {
02811 if (ByteCount() > sizeof(long))
02812 return false;
02813
02814 unsigned long value = (unsigned long)reg[0];
02815 value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
02816
02817 if (sign==POSITIVE)
02818 return (signed long)value >= 0;
02819 else
02820 return -(signed long)value < 0;
02821 }
02822
02823 signed long Integer::ConvertToLong() const
02824 {
02825 assert(IsConvertableToLong());
02826
02827 unsigned long value = (unsigned long)reg[0];
02828 value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
02829 return sign==POSITIVE ? value : -(signed long)value;
02830 }
02831
02832 Integer::Integer(BufferedTransformation &encodedInteger, size_t byteCount, Signedness s)
02833 {
02834 Decode(encodedInteger, byteCount, s);
02835 }
02836
02837 Integer::Integer(const byte *encodedInteger, size_t byteCount, Signedness s)
02838 {
02839 Decode(encodedInteger, byteCount, s);
02840 }
02841
02842 Integer::Integer(BufferedTransformation &bt)
02843 {
02844 BERDecode(bt);
02845 }
02846
02847 Integer::Integer(RandomNumberGenerator &rng, size_t bitcount)
02848 {
02849 Randomize(rng, bitcount);
02850 }
02851
02852 Integer::Integer(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
02853 {
02854 if (!Randomize(rng, min, max, rnType, equiv, mod))
02855 throw Integer::RandomNumberNotFound();
02856 }
02857
02858 Integer Integer::Power2(size_t e)
02859 {
02860 Integer r((word)0, BitsToWords(e+1));
02861 r.SetBit(e);
02862 return r;
02863 }
02864
02865 template <long i>
02866 struct NewInteger
02867 {
02868 Integer * operator()() const
02869 {
02870 return new Integer(i);
02871 }
02872 };
02873
02874 const Integer &Integer::Zero()
02875 {
02876 return Singleton<Integer>().Ref();
02877 }
02878
02879 const Integer &Integer::One()
02880 {
02881 return Singleton<Integer, NewInteger<1> >().Ref();
02882 }
02883
02884 const Integer &Integer::Two()
02885 {
02886 return Singleton<Integer, NewInteger<2> >().Ref();
02887 }
02888
02889 bool Integer::operator!() const
02890 {
02891 return IsNegative() ? false : (reg[0]==0 && WordCount()==0);
02892 }
02893
02894 Integer& Integer::operator=(const Integer& t)
02895 {
02896 if (this != &t)
02897 {
02898 if (reg.size() != t.reg.size() || t.reg[t.reg.size()/2] == 0)
02899 reg.New(RoundupSize(t.WordCount()));
02900 CopyWords(reg, t.reg, reg.size());
02901 sign = t.sign;
02902 }
02903 return *this;
02904 }
02905
02906 bool Integer::GetBit(size_t n) const
02907 {
02908 if (n/WORD_BITS >= reg.size())
02909 return 0;
02910 else
02911 return bool((reg[n/WORD_BITS] >> (n % WORD_BITS)) & 1);
02912 }
02913
02914 void Integer::SetBit(size_t n, bool value)
02915 {
02916 if (value)
02917 {
02918 reg.CleanGrow(RoundupSize(BitsToWords(n+1)));
02919 reg[n/WORD_BITS] |= (word(1) << (n%WORD_BITS));
02920 }
02921 else
02922 {
02923 if (n/WORD_BITS < reg.size())
02924 reg[n/WORD_BITS] &= ~(word(1) << (n%WORD_BITS));
02925 }
02926 }
02927
02928 byte Integer::GetByte(size_t n) const
02929 {
02930 if (n/WORD_SIZE >= reg.size())
02931 return 0;
02932 else
02933 return byte(reg[n/WORD_SIZE] >> ((n%WORD_SIZE)*8));
02934 }
02935
02936 void Integer::SetByte(size_t n, byte value)
02937 {
02938 reg.CleanGrow(RoundupSize(BytesToWords(n+1)));
02939 reg[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE));
02940 reg[n/WORD_SIZE] |= (word(value) << 8*(n%WORD_SIZE));
02941 }
02942
02943 lword Integer::GetBits(size_t i, size_t n) const
02944 {
02945 lword v = 0;
02946 assert(n <= sizeof(v)*8);
02947 for (unsigned int j=0; j<n; j++)
02948 v |= lword(GetBit(i+j)) << j;
02949 return v;
02950 }
02951
02952 Integer Integer::operator-() const
02953 {
02954 Integer result(*this);
02955 result.Negate();
02956 return result;
02957 }
02958
02959 Integer Integer::AbsoluteValue() const
02960 {
02961 Integer result(*this);
02962 result.sign = POSITIVE;
02963 return result;
02964 }
02965
02966 void Integer::swap(Integer &a)
02967 {
02968 reg.swap(a.reg);
02969 std::swap(sign, a.sign);
02970 }
02971
02972 Integer::Integer(word value, size_t length)
02973 : reg(RoundupSize(length)), sign(POSITIVE)
02974 {
02975 reg[0] = value;
02976 SetWords(reg+1, 0, reg.size()-1);
02977 }
02978
02979 template <class T>
02980 static Integer StringToInteger(const T *str)
02981 {
02982 int radix;
02983
02984
02985 unsigned int length;
02986 for (length = 0; str[length] != 0; length++) {}
02987
02988 Integer v;
02989
02990 if (length == 0)
02991 return v;
02992
02993 switch (str[length-1])
02994 {
02995 case 'h':
02996 case 'H':
02997 radix=16;
02998 break;
02999 case 'o':
03000 case 'O':
03001 radix=8;
03002 break;
03003 case 'b':
03004 case 'B':
03005 radix=2;
03006 break;
03007 default:
03008 radix=10;
03009 }
03010
03011 if (length > 2 && str[0] == '0' && str[1] == 'x')
03012 radix = 16;
03013
03014 for (unsigned i=0; i<length; i++)
03015 {
03016 int digit;
03017
03018 if (str[i] >= '0' && str[i] <= '9')
03019 digit = str[i] - '0';
03020 else if (str[i] >= 'A' && str[i] <= 'F')
03021 digit = str[i] - 'A' + 10;
03022 else if (str[i] >= 'a' && str[i] <= 'f')
03023 digit = str[i] - 'a' + 10;
03024 else
03025 digit = radix;
03026
03027 if (digit < radix)
03028 {
03029 v *= radix;
03030 v += digit;
03031 }
03032 }
03033
03034 if (str[0] == '-')
03035 v.Negate();
03036
03037 return v;
03038 }
03039
03040 Integer::Integer(const char *str)
03041 : reg(2), sign(POSITIVE)
03042 {
03043 *this = StringToInteger(str);
03044 }
03045
03046 Integer::Integer(const wchar_t *str)
03047 : reg(2), sign(POSITIVE)
03048 {
03049 *this = StringToInteger(str);
03050 }
03051
03052 unsigned int Integer::WordCount() const
03053 {
03054 return (unsigned int)CountWords(reg, reg.size());
03055 }
03056
03057 unsigned int Integer::ByteCount() const
03058 {
03059 unsigned wordCount = WordCount();
03060 if (wordCount)
03061 return (wordCount-1)*WORD_SIZE + BytePrecision(reg[wordCount-1]);
03062 else
03063 return 0;
03064 }
03065
03066 unsigned int Integer::BitCount() const
03067 {
03068 unsigned wordCount = WordCount();
03069 if (wordCount)
03070 return (wordCount-1)*WORD_BITS + BitPrecision(reg[wordCount-1]);
03071 else
03072 return 0;
03073 }
03074
03075 void Integer::Decode(const byte *input, size_t inputLen, Signedness s)
03076 {
03077 StringStore store(input, inputLen);
03078 Decode(store, inputLen, s);
03079 }
03080
03081 void Integer::Decode(BufferedTransformation &bt, size_t inputLen, Signedness s)
03082 {
03083 assert(bt.MaxRetrievable() >= inputLen);
03084
03085 byte b;
03086 bt.Peek(b);
03087 sign = ((s==SIGNED) && (b & 0x80)) ? NEGATIVE : POSITIVE;
03088
03089 while (inputLen>0 && (sign==POSITIVE ? b==0 : b==0xff))
03090 {
03091 bt.Skip(1);
03092 inputLen--;
03093 bt.Peek(b);
03094 }
03095
03096 reg.CleanNew(RoundupSize(BytesToWords(inputLen)));
03097
03098 for (size_t i=inputLen; i > 0; i--)
03099 {
03100 bt.Get(b);
03101 reg[(i-1)/WORD_SIZE] |= word(b) << ((i-1)%WORD_SIZE)*8;
03102 }
03103
03104 if (sign == NEGATIVE)
03105 {
03106 for (size_t i=inputLen; i<reg.size()*WORD_SIZE; i++)
03107 reg[i/WORD_SIZE] |= word(0xff) << (i%WORD_SIZE)*8;
03108 TwosComplement(reg, reg.size());
03109 }
03110 }
03111
03112 size_t Integer::MinEncodedSize(Signedness signedness) const
03113 {
03114 unsigned int outputLen = STDMAX(1U, ByteCount());
03115 if (signedness == UNSIGNED)
03116 return outputLen;
03117 if (NotNegative() && (GetByte(outputLen-1) & 0x80))
03118 outputLen++;
03119 if (IsNegative() && *this < -Power2(outputLen*8-1))
03120 outputLen++;
03121 return outputLen;
03122 }
03123
03124 void Integer::Encode(byte *output, size_t outputLen, Signedness signedness) const
03125 {
03126 ArraySink sink(output, outputLen);
03127 Encode(sink, outputLen, signedness);
03128 }
03129
03130 void Integer::Encode(BufferedTransformation &bt, size_t outputLen, Signedness signedness) const
03131 {
03132 if (signedness == UNSIGNED || NotNegative())
03133 {
03134 for (size_t i=outputLen; i > 0; i--)
03135 bt.Put(GetByte(i-1));
03136 }
03137 else
03138 {
03139
03140 Integer temp = Integer::Power2(8*STDMAX((size_t)ByteCount(), outputLen)) + *this;
03141 temp.Encode(bt, outputLen, UNSIGNED);
03142 }
03143 }
03144
03145 void Integer::DEREncode(BufferedTransformation &bt) const
03146 {
03147 DERGeneralEncoder enc(bt, INTEGER);
03148 Encode(enc, MinEncodedSize(SIGNED), SIGNED);
03149 enc.MessageEnd();
03150 }
03151
03152 void Integer::BERDecode(const byte *input, size_t len)
03153 {
03154 StringStore store(input, len);
03155 BERDecode(store);
03156 }
03157
03158 void Integer::BERDecode(BufferedTransformation &bt)
03159 {
03160 BERGeneralDecoder dec(bt, INTEGER);
03161 if (!dec.IsDefiniteLength() || dec.MaxRetrievable() < dec.RemainingLength())
03162 BERDecodeError();
03163 Decode(dec, (size_t)dec.RemainingLength(), SIGNED);
03164 dec.MessageEnd();
03165 }
03166
03167 void Integer::DEREncodeAsOctetString(BufferedTransformation &bt, size_t length) const
03168 {
03169 DERGeneralEncoder enc(bt, OCTET_STRING);
03170 Encode(enc, length);
03171 enc.MessageEnd();
03172 }
03173
03174 void Integer::BERDecodeAsOctetString(BufferedTransformation &bt, size_t length)
03175 {
03176 BERGeneralDecoder dec(bt, OCTET_STRING);
03177 if (!dec.IsDefiniteLength() || dec.RemainingLength() != length)
03178 BERDecodeError();
03179 Decode(dec, length);
03180 dec.MessageEnd();
03181 }
03182
03183 size_t Integer::OpenPGPEncode(byte *output, size_t len) const
03184 {
03185 ArraySink sink(output, len);
03186 return OpenPGPEncode(sink);
03187 }
03188
03189 size_t Integer::OpenPGPEncode(BufferedTransformation &bt) const
03190 {
03191 word16 bitCount = BitCount();
03192 bt.PutWord16(bitCount);
03193 size_t byteCount = BitsToBytes(bitCount);
03194 Encode(bt, byteCount);
03195 return 2 + byteCount;
03196 }
03197
03198 void Integer::OpenPGPDecode(const byte *input, size_t len)
03199 {
03200 StringStore store(input, len);
03201 OpenPGPDecode(store);
03202 }
03203
03204 void Integer::OpenPGPDecode(BufferedTransformation &bt)
03205 {
03206 word16 bitCount;
03207 if (bt.GetWord16(bitCount) != 2 || bt.MaxRetrievable() < BitsToBytes(bitCount))
03208 throw OpenPGPDecodeErr();
03209 Decode(bt, BitsToBytes(bitCount));
03210 }
03211
03212 void Integer::Randomize(RandomNumberGenerator &rng, size_t nbits)
03213 {
03214 const size_t nbytes = nbits/8 + 1;
03215 SecByteBlock buf(nbytes);
03216 rng.GenerateBlock(buf, nbytes);
03217 if (nbytes)
03218 buf[0] = (byte)Crop(buf[0], nbits % 8);
03219 Decode(buf, nbytes, UNSIGNED);
03220 }
03221
03222 void Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max)
03223 {
03224 if (min > max)
03225 throw InvalidArgument("Integer: Min must be no greater than Max");
03226
03227 Integer range = max - min;
03228 const unsigned int nbits = range.BitCount();
03229
03230 do
03231 {
03232 Randomize(rng, nbits);
03233 }
03234 while (*this > range);
03235
03236 *this += min;
03237 }
03238
03239 bool Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
03240 {
03241 return GenerateRandomNoThrow(rng, MakeParameters("Min", min)("Max", max)("RandomNumberType", rnType)("EquivalentTo", equiv)("Mod", mod));
03242 }
03243
03244 class KDF2_RNG : public RandomNumberGenerator
03245 {
03246 public:
03247 KDF2_RNG(const byte *seed, size_t seedSize)
03248 : m_counter(0), m_counterAndSeed(seedSize + 4)
03249 {
03250 memcpy(m_counterAndSeed + 4, seed, seedSize);
03251 }
03252
03253 void GenerateBlock(byte *output, size_t size)
03254 {
03255 PutWord(false, BIG_ENDIAN_ORDER, m_counterAndSeed, m_counter);
03256 ++m_counter;
03257 P1363_KDF2<SHA1>::DeriveKey(output, size, m_counterAndSeed, m_counterAndSeed.size(), NULL, 0);
03258 }
03259
03260 private:
03261 word32 m_counter;
03262 SecByteBlock m_counterAndSeed;
03263 };
03264
03265 bool Integer::GenerateRandomNoThrow(RandomNumberGenerator &i_rng, const NameValuePairs ¶ms)
03266 {
03267 Integer min = params.GetValueWithDefault("Min", Integer::Zero());
03268 Integer max;
03269 if (!params.GetValue("Max", max))
03270 {
03271 int bitLength;
03272 if (params.GetIntValue("BitLength", bitLength))
03273 max = Integer::Power2(bitLength);
03274 else
03275 throw InvalidArgument("Integer: missing Max argument");
03276 }
03277 if (min > max)
03278 throw InvalidArgument("Integer: Min must be no greater than Max");
03279
03280 Integer equiv = params.GetValueWithDefault("EquivalentTo", Integer::Zero());
03281 Integer mod = params.GetValueWithDefault("Mod", Integer::One());
03282
03283 if (equiv.IsNegative() || equiv >= mod)
03284 throw InvalidArgument("Integer: invalid EquivalentTo and/or Mod argument");
03285
03286 Integer::RandomNumberType rnType = params.GetValueWithDefault("RandomNumberType", Integer::ANY);
03287
03288 member_ptr<KDF2_RNG> kdf2Rng;
03289 ConstByteArrayParameter seed;
03290 if (params.GetValue(Name::Seed(), seed))
03291 {
03292 ByteQueue bq;
03293 DERSequenceEncoder seq(bq);
03294 min.DEREncode(seq);
03295 max.DEREncode(seq);
03296 equiv.DEREncode(seq);
03297 mod.DEREncode(seq);
03298 DEREncodeUnsigned(seq, rnType);
03299 DEREncodeOctetString(seq, seed.begin(), seed.size());
03300 seq.MessageEnd();
03301
03302 SecByteBlock finalSeed((size_t)bq.MaxRetrievable());
03303 bq.Get(finalSeed, finalSeed.size());
03304 kdf2Rng.reset(new KDF2_RNG(finalSeed.begin(), finalSeed.size()));
03305 }
03306 RandomNumberGenerator &rng = kdf2Rng.get() ? (RandomNumberGenerator &)*kdf2Rng : i_rng;
03307
03308 switch (rnType)
03309 {
03310 case ANY:
03311 if (mod == One())
03312 Randomize(rng, min, max);
03313 else
03314 {
03315 Integer min1 = min + (equiv-min)%mod;
03316 if (max < min1)
03317 return false;
03318 Randomize(rng, Zero(), (max - min1) / mod);
03319 *this *= mod;
03320 *this += min1;
03321 }
03322 return true;
03323
03324 case PRIME:
03325 {
03326 const PrimeSelector *pSelector = params.GetValueWithDefault(Name::PointerToPrimeSelector(), (const PrimeSelector *)NULL);
03327
03328 int i;
03329 i = 0;
03330 while (1)
03331 {
03332 if (++i==16)
03333 {
03334
03335 Integer first = min;
03336 if (FirstPrime(first, max, equiv, mod, pSelector))
03337 {
03338
03339 *this = first;
03340 if (!FirstPrime(first, max, equiv, mod, pSelector))
03341 return true;
03342 }
03343 else
03344 return false;
03345 }
03346
03347 Randomize(rng, min, max);
03348 if (FirstPrime(*this, STDMIN(*this+mod*PrimeSearchInterval(max), max), equiv, mod, pSelector))
03349 return true;
03350 }
03351 }
03352
03353 default:
03354 throw InvalidArgument("Integer: invalid RandomNumberType argument");
03355 }
03356 }
03357
03358 std::istream& operator>>(std::istream& in, Integer &a)
03359 {
03360 char c;
03361 unsigned int length = 0;
03362 SecBlock<char> str(length + 16);
03363
03364 std::ws(in);
03365
03366 do
03367 {
03368 in.read(&c, 1);
03369 str[length++] = c;
03370 if (length >= str.size())
03371 str.Grow(length + 16);
03372 }
03373 while (in && (c=='-' || c=='x' || (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F') || c=='h' || c=='H' || c=='o' || c=='O' || c==',' || c=='.'));
03374
03375 if (in.gcount())
03376 in.putback(c);
03377 str[length-1] = '\0';
03378 a = Integer(str);
03379
03380 return in;
03381 }
03382
03383 std::ostream& operator<<(std::ostream& out, const Integer &a)
03384 {
03385
03386 long f = out.flags() & std::ios::basefield;
03387 int base, block;
03388 char suffix;
03389 switch(f)
03390 {
03391 case std::ios::oct :
03392 base = 8;
03393 block = 8;
03394 suffix = 'o';
03395 break;
03396 case std::ios::hex :
03397 base = 16;
03398 block = 4;
03399 suffix = 'h';
03400 break;
03401 default :
03402 base = 10;
03403 block = 3;
03404 suffix = '.';
03405 }
03406
03407 Integer temp1=a, temp2;
03408
03409 if (a.IsNegative())
03410 {
03411 out << '-';
03412 temp1.Negate();
03413 }
03414
03415 if (!a)
03416 out << '0';
03417
03418 static const char upper[]="0123456789ABCDEF";
03419 static const char lower[]="0123456789abcdef";
03420
03421 const char* vec = (out.flags() & std::ios::uppercase) ? upper : lower;
03422 unsigned i=0;
03423 SecBlock<char> s(a.BitCount() / (BitPrecision(base)-1) + 1);
03424
03425 while (!!temp1)
03426 {
03427 word digit;
03428 Integer::Divide(digit, temp2, temp1, base);
03429 s[i++]=vec[digit];
03430 temp1.swap(temp2);
03431 }
03432
03433 while (i--)
03434 {
03435 out << s[i];
03436
03437
03438 }
03439 return out << suffix;
03440 }
03441
03442 Integer& Integer::operator++()
03443 {
03444 if (NotNegative())
03445 {
03446 if (Increment(reg, reg.size()))
03447 {
03448 reg.CleanGrow(2*reg.size());
03449 reg[reg.size()/2]=1;
03450 }
03451 }
03452 else
03453 {
03454 word borrow = Decrement(reg, reg.size());
03455 assert(!borrow);
03456 if (WordCount()==0)
03457 *this = Zero();
03458 }
03459 return *this;
03460 }
03461
03462 Integer& Integer::operator--()
03463 {
03464 if (IsNegative())
03465 {
03466 if (Increment(reg, reg.size()))
03467 {
03468 reg.CleanGrow(2*reg.size());
03469 reg[reg.size()/2]=1;
03470 }
03471 }
03472 else
03473 {
03474 if (Decrement(reg, reg.size()))
03475 *this = -One();
03476 }
03477 return *this;
03478 }
03479
03480 void PositiveAdd(Integer &sum, const Integer &a, const Integer& b)
03481 {
03482 int carry;
03483 if (a.reg.size() == b.reg.size())
03484 carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
03485 else if (a.reg.size() > b.reg.size())
03486 {
03487 carry = Add(sum.reg, a.reg, b.reg, b.reg.size());
03488 CopyWords(sum.reg+b.reg.size(), a.reg+b.reg.size(), a.reg.size()-b.reg.size());
03489 carry = Increment(sum.reg+b.reg.size(), a.reg.size()-b.reg.size(), carry);
03490 }
03491 else
03492 {
03493 carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
03494 CopyWords(sum.reg+a.reg.size(), b.reg+a.reg.size(), b.reg.size()-a.reg.size());
03495 carry = Increment(sum.reg+a.reg.size(), b.reg.size()-a.reg.size(), carry);
03496 }
03497
03498 if (carry)
03499 {
03500 sum.reg.CleanGrow(2*sum.reg.size());
03501 sum.reg[sum.reg.size()/2] = 1;
03502 }
03503 sum.sign = Integer::POSITIVE;
03504 }
03505
03506 void PositiveSubtract(Integer &diff, const Integer &a, const Integer& b)
03507 {
03508 unsigned aSize = a.WordCount();
03509 aSize += aSize%2;
03510 unsigned bSize = b.WordCount();
03511 bSize += bSize%2;
03512
03513 if (aSize == bSize)
03514 {
03515 if (Compare(a.reg, b.reg, aSize) >= 0)
03516 {
03517 Subtract(diff.reg, a.reg, b.reg, aSize);
03518 diff.sign = Integer::POSITIVE;
03519 }
03520 else
03521 {
03522 Subtract(diff.reg, b.reg, a.reg, aSize);
03523 diff.sign = Integer::NEGATIVE;
03524 }
03525 }
03526 else if (aSize > bSize)
03527 {
03528 word borrow = Subtract(diff.reg, a.reg, b.reg, bSize);
03529 CopyWords(diff.reg+bSize, a.reg+bSize, aSize-bSize);
03530 borrow = Decrement(diff.reg+bSize, aSize-bSize, borrow);
03531 assert(!borrow);
03532 diff.sign = Integer::POSITIVE;
03533 }
03534 else
03535 {
03536 word borrow = Subtract(diff.reg, b.reg, a.reg, aSize);
03537 CopyWords(diff.reg+aSize, b.reg+aSize, bSize-aSize);
03538 borrow = Decrement(diff.reg+aSize, bSize-aSize, borrow);
03539 assert(!borrow);
03540 diff.sign = Integer::NEGATIVE;
03541 }
03542 }
03543
03544
03545 template <class T> inline const T& STDMAX2(const T& a, const T& b)
03546 {
03547 return a < b ? b : a;
03548 }
03549
03550 Integer Integer::Plus(const Integer& b) const
03551 {
03552 Integer sum((word)0, STDMAX2(reg.size(), b.reg.size()));
03553 if (NotNegative())
03554 {
03555 if (b.NotNegative())
03556 PositiveAdd(sum, *this, b);
03557 else
03558 PositiveSubtract(sum, *this, b);
03559 }
03560 else
03561 {
03562 if (b.NotNegative())
03563 PositiveSubtract(sum, b, *this);
03564 else
03565 {
03566 PositiveAdd(sum, *this, b);
03567 sum.sign = Integer::NEGATIVE;
03568 }
03569 }
03570 return sum;
03571 }
03572
03573 Integer& Integer::operator+=(const Integer& t)
03574 {
03575 reg.CleanGrow(t.reg.size());
03576 if (NotNegative())
03577 {
03578 if (t.NotNegative())
03579 PositiveAdd(*this, *this, t);
03580 else
03581 PositiveSubtract(*this, *this, t);
03582 }
03583 else
03584 {
03585 if (t.NotNegative())
03586 PositiveSubtract(*this, t, *this);
03587 else
03588 {
03589 PositiveAdd(*this, *this, t);
03590 sign = Integer::NEGATIVE;
03591 }
03592 }
03593 return *this;
03594 }
03595
03596 Integer Integer::Minus(const Integer& b) const
03597 {
03598 Integer diff((word)0, STDMAX2(reg.size(), b.reg.size()));
03599 if (NotNegative())
03600 {
03601 if (b.NotNegative())
03602 PositiveSubtract(diff, *this, b);
03603 else
03604 PositiveAdd(diff, *this, b);
03605 }
03606 else
03607 {
03608 if (b.NotNegative())
03609 {
03610 PositiveAdd(diff, *this, b);
03611 diff.sign = Integer::NEGATIVE;
03612 }
03613 else
03614 PositiveSubtract(diff, b, *this);
03615 }
03616 return diff;
03617 }
03618
03619 Integer& Integer::operator-=(const Integer& t)
03620 {
03621 reg.CleanGrow(t.reg.size());
03622 if (NotNegative())
03623 {
03624 if (t.NotNegative())
03625 PositiveSubtract(*this, *this, t);
03626 else
03627 PositiveAdd(*this, *this, t);
03628 }
03629 else
03630 {
03631 if (t.NotNegative())
03632 {
03633 PositiveAdd(*this, *this, t);
03634 sign = Integer::NEGATIVE;
03635 }
03636 else
03637 PositiveSubtract(*this, t, *this);
03638 }
03639 return *this;
03640 }
03641
03642 Integer& Integer::operator<<=(size_t n)
03643 {
03644 const size_t wordCount = WordCount();
03645 const size_t shiftWords = n / WORD_BITS;
03646 const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
03647
03648 reg.CleanGrow(RoundupSize(wordCount+BitsToWords(n)));
03649 ShiftWordsLeftByWords(reg, wordCount + shiftWords, shiftWords);
03650 ShiftWordsLeftByBits(reg+shiftWords, wordCount+BitsToWords(shiftBits), shiftBits);
03651 return *this;
03652 }
03653
03654 Integer& Integer::operator>>=(size_t n)
03655 {
03656 const size_t wordCount = WordCount();
03657 const size_t shiftWords = n / WORD_BITS;
03658 const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
03659
03660 ShiftWordsRightByWords(reg, wordCount, shiftWords);
03661 if (wordCount > shiftWords)
03662 ShiftWordsRightByBits(reg, wordCount-shiftWords, shiftBits);
03663 if (IsNegative() && WordCount()==0)
03664 *this = Zero();
03665 return *this;
03666 }
03667
03668 void PositiveMultiply(Integer &product, const Integer &a, const Integer &b)
03669 {
03670 size_t aSize = RoundupSize(a.WordCount());
03671 size_t bSize = RoundupSize(b.WordCount());
03672
03673 product.reg.CleanNew(RoundupSize(aSize+bSize));
03674 product.sign = Integer::POSITIVE;
03675
03676 IntegerSecBlock workspace(aSize + bSize);
03677 AsymmetricMultiply(product.reg, workspace, a.reg, aSize, b.reg, bSize);
03678 }
03679
03680 void Multiply(Integer &product, const Integer &a, const Integer &b)
03681 {
03682 PositiveMultiply(product, a, b);
03683
03684 if (a.NotNegative() != b.NotNegative())
03685 product.Negate();
03686 }
03687
03688 Integer Integer::Times(const Integer &b) const
03689 {
03690 Integer product;
03691 Multiply(product, *this, b);
03692 return product;
03693 }
03694
03695
03696
03697
03698
03699
03700
03701
03702
03703
03704
03705
03706
03707
03708
03709
03710
03711
03712
03713
03714
03715
03716
03717 void PositiveDivide(Integer &remainder, Integer "ient,
03718 const Integer &a, const Integer &b)
03719 {
03720 unsigned aSize = a.WordCount();
03721 unsigned bSize = b.WordCount();
03722
03723 if (!bSize)
03724 throw Integer::DivideByZero();
03725
03726 if (aSize < bSize)
03727 {
03728 remainder = a;
03729 remainder.sign = Integer::POSITIVE;
03730 quotient = Integer::Zero();
03731 return;
03732 }
03733
03734 aSize += aSize%2;
03735 bSize += bSize%2;
03736
03737 remainder.reg.CleanNew(RoundupSize(bSize));
03738 remainder.sign = Integer::POSITIVE;
03739 quotient.reg.CleanNew(RoundupSize(aSize-bSize+2));
03740 quotient.sign = Integer::POSITIVE;
03741
03742 IntegerSecBlock T(aSize+3*(bSize+2));
03743 Divide(remainder.reg, quotient.reg, T, a.reg, aSize, b.reg, bSize);
03744 }
03745
03746 void Integer::Divide(Integer &remainder, Integer "ient, const Integer ÷nd, const Integer &divisor)
03747 {
03748 PositiveDivide(remainder, quotient, dividend, divisor);
03749
03750 if (dividend.IsNegative())
03751 {
03752 quotient.Negate();
03753 if (remainder.NotZero())
03754 {
03755 --quotient;
03756 remainder = divisor.AbsoluteValue() - remainder;
03757 }
03758 }
03759
03760 if (divisor.IsNegative())
03761 quotient.Negate();
03762 }
03763
03764 void Integer::DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
03765 {
03766 q = a;
03767 q >>= n;
03768
03769 const size_t wordCount = BitsToWords(n);
03770 if (wordCount <= a.WordCount())
03771 {
03772 r.reg.resize(RoundupSize(wordCount));
03773 CopyWords(r.reg, a.reg, wordCount);
03774 SetWords(r.reg+wordCount, 0, r.reg.size()-wordCount);
03775 if (n % WORD_BITS != 0)
03776 r.reg[wordCount-1] %= (word(1) << (n % WORD_BITS));
03777 }
03778 else
03779 {
03780 r.reg.resize(RoundupSize(a.WordCount()));
03781 CopyWords(r.reg, a.reg, r.reg.size());
03782 }
03783 r.sign = POSITIVE;
03784
03785 if (a.IsNegative() && r.NotZero())
03786 {
03787 --q;
03788 r = Power2(n) - r;
03789 }
03790 }
03791
03792 Integer Integer::DividedBy(const Integer &b) const
03793 {
03794 Integer remainder, quotient;
03795 Integer::Divide(remainder, quotient, *this, b);
03796 return quotient;
03797 }
03798
03799 Integer Integer::Modulo(const Integer &b) const
03800 {
03801 Integer remainder, quotient;
03802 Integer::Divide(remainder, quotient, *this, b);
03803 return remainder;
03804 }
03805
03806 void Integer::Divide(word &remainder, Integer "ient, const Integer ÷nd, word divisor)
03807 {
03808 if (!divisor)
03809 throw Integer::DivideByZero();
03810
03811 assert(divisor);
03812
03813 if ((divisor & (divisor-1)) == 0)
03814 {
03815 quotient = dividend >> (BitPrecision(divisor)-1);
03816 remainder = dividend.reg[0] & (divisor-1);
03817 return;
03818 }
03819
03820 unsigned int i = dividend.WordCount();
03821 quotient.reg.CleanNew(RoundupSize(i));
03822 remainder = 0;
03823 while (i--)
03824 {
03825 quotient.reg[i] = DWord(dividend.reg[i], remainder) / divisor;
03826 remainder = DWord(dividend.reg[i], remainder) % divisor;
03827 }
03828
03829 if (dividend.NotNegative())
03830 quotient.sign = POSITIVE;
03831 else
03832 {
03833 quotient.sign = NEGATIVE;
03834 if (remainder)
03835 {
03836 --quotient;
03837 remainder = divisor - remainder;
03838 }
03839 }
03840 }
03841
03842 Integer Integer::DividedBy(word b) const
03843 {
03844 word remainder;
03845 Integer quotient;
03846 Integer::Divide(remainder, quotient, *this, b);
03847 return quotient;
03848 }
03849
03850 word Integer::Modulo(word divisor) const
03851 {
03852 if (!divisor)
03853 throw Integer::DivideByZero();
03854
03855 assert(divisor);
03856
03857 word remainder;
03858
03859 if ((divisor & (divisor-1)) == 0)
03860 remainder = reg[0] & (divisor-1);
03861 else
03862 {
03863 unsigned int i = WordCount();
03864
03865 if (divisor <= 5)
03866 {
03867 DWord sum(0, 0);
03868 while (i--)
03869 sum += reg[i];
03870 remainder = sum % divisor;
03871 }
03872 else
03873 {
03874 remainder = 0;
03875 while (i--)
03876 remainder = DWord(reg[i], remainder) % divisor;
03877 }
03878 }
03879
03880 if (IsNegative() && remainder)
03881 remainder = divisor - remainder;
03882
03883 return remainder;
03884 }
03885
03886 void Integer::Negate()
03887 {
03888 if (!!(*this))
03889 sign = Sign(1-sign);
03890 }
03891
03892 int Integer::PositiveCompare(const Integer& t) const
03893 {
03894 unsigned size = WordCount(), tSize = t.WordCount();
03895
03896 if (size == tSize)
03897 return CryptoPP::Compare(reg, t.reg, size);
03898 else
03899 return size > tSize ? 1 : -1;
03900 }
03901
03902 int Integer::Compare(const Integer& t) const
03903 {
03904 if (NotNegative())
03905 {
03906 if (t.NotNegative())
03907 return PositiveCompare(t);
03908 else
03909 return 1;
03910 }
03911 else
03912 {
03913 if (t.NotNegative())
03914 return -1;
03915 else
03916 return -PositiveCompare(t);
03917 }
03918 }
03919
03920 Integer Integer::SquareRoot() const
03921 {
03922 if (!IsPositive())
03923 return Zero();
03924
03925
03926 Integer x, y = Power2((BitCount()+1)/2);
03927 assert(y*y >= *this);
03928
03929 do
03930 {
03931 x = y;
03932 y = (x + *this/x) >> 1;
03933 } while (y<x);
03934
03935 return x;
03936 }
03937
03938 bool Integer::IsSquare() const
03939 {
03940 Integer r = SquareRoot();
03941 return *this == r.Squared();
03942 }
03943
03944 bool Integer::IsUnit() const
03945 {
03946 return (WordCount() == 1) && (reg[0] == 1);
03947 }
03948
03949 Integer Integer::MultiplicativeInverse() const
03950 {
03951 return IsUnit() ? *this : Zero();
03952 }
03953
03954 Integer a_times_b_mod_c(const Integer &x, const Integer& y, const Integer& m)
03955 {
03956 return x*y%m;
03957 }
03958
03959 Integer a_exp_b_mod_c(const Integer &x, const Integer& e, const Integer& m)
03960 {
03961 ModularArithmetic mr(m);
03962 return mr.Exponentiate(x, e);
03963 }
03964
03965 Integer Integer::Gcd(const Integer &a, const Integer &b)
03966 {
03967 return EuclideanDomainOf<Integer>().Gcd(a, b);
03968 }
03969
03970 Integer Integer::InverseMod(const Integer &m) const
03971 {
03972 assert(m.NotNegative());
03973
03974 if (IsNegative())
03975 return Modulo(m).InverseMod(m);
03976
03977 if (m.IsEven())
03978 {
03979 if (!m || IsEven())
03980 return Zero();
03981 if (*this == One())
03982 return One();
03983
03984 Integer u = m.Modulo(*this).InverseMod(*this);
03985 return !u ? Zero() : (m*(*this-u)+1)/(*this);
03986 }
03987
03988 SecBlock<word> T(m.reg.size() * 4);
03989 Integer r((word)0, m.reg.size());
03990 unsigned k = AlmostInverse(r.reg, T, reg, reg.size(), m.reg, m.reg.size());
03991 DivideByPower2Mod(r.reg, r.reg, k, m.reg, m.reg.size());
03992 return r;
03993 }
03994
03995 word Integer::InverseMod(word mod) const
03996 {
03997 word g0 = mod, g1 = *this % mod;
03998 word v0 = 0, v1 = 1;
03999 word y;
04000
04001 while (g1)
04002 {
04003 if (g1 == 1)
04004 return v1;
04005 y = g0 / g1;
04006 g0 = g0 % g1;
04007 v0 += y * v1;
04008
04009 if (!g0)
04010 break;
04011 if (g0 == 1)
04012 return mod-v0;
04013 y = g1 / g0;
04014 g1 = g1 % g0;
04015 v1 += y * v0;
04016 }
04017 return 0;
04018 }
04019
04020
04021
04022 ModularArithmetic::ModularArithmetic(BufferedTransformation &bt)
04023 {
04024 BERSequenceDecoder seq(bt);
04025 OID oid(seq);
04026 if (oid != ASN1::prime_field())
04027 BERDecodeError();
04028 m_modulus.BERDecode(seq);
04029 seq.MessageEnd();
04030 m_result.reg.resize(m_modulus.reg.size());
04031 }
04032
04033 void ModularArithmetic::DEREncode(BufferedTransformation &bt) const
04034 {
04035 DERSequenceEncoder seq(bt);
04036 ASN1::prime_field().DEREncode(seq);
04037 m_modulus.DEREncode(seq);
04038 seq.MessageEnd();
04039 }
04040
04041 void ModularArithmetic::DEREncodeElement(BufferedTransformation &out, const Element &a) const
04042 {
04043 a.DEREncodeAsOctetString(out, MaxElementByteLength());
04044 }
04045
04046 void ModularArithmetic::BERDecodeElement(BufferedTransformation &in, Element &a) const
04047 {
04048 a.BERDecodeAsOctetString(in, MaxElementByteLength());
04049 }
04050
04051 const Integer& ModularArithmetic::Half(const Integer &a) const
04052 {
04053 if (a.reg.size()==m_modulus.reg.size())
04054 {
04055 CryptoPP::DivideByPower2Mod(m_result.reg.begin(), a.reg, 1, m_modulus.reg, a.reg.size());
04056 return m_result;
04057 }
04058 else
04059 return m_result1 = (a.IsEven() ? (a >> 1) : ((a+m_modulus) >> 1));
04060 }
04061
04062 const Integer& ModularArithmetic::Add(const Integer &a, const Integer &b) const
04063 {
04064 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04065 {
04066 if (CryptoPP::Add(m_result.reg.begin(), a.reg, b.reg, a.reg.size())
04067 || Compare(m_result.reg, m_modulus.reg, a.reg.size()) >= 0)
04068 {
04069 CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
04070 }
04071 return m_result;
04072 }
04073 else
04074 {
04075 m_result1 = a+b;
04076 if (m_result1 >= m_modulus)
04077 m_result1 -= m_modulus;
04078 return m_result1;
04079 }
04080 }
04081
04082 Integer& ModularArithmetic::Accumulate(Integer &a, const Integer &b) const
04083 {
04084 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04085 {
04086 if (CryptoPP::Add(a.reg, a.reg, b.reg, a.reg.size())
04087 || Compare(a.reg, m_modulus.reg, a.reg.size()) >= 0)
04088 {
04089 CryptoPP::Subtract(a.reg, a.reg, m_modulus.reg, a.reg.size());
04090 }
04091 }
04092 else
04093 {
04094 a+=b;
04095 if (a>=m_modulus)
04096 a-=m_modulus;
04097 }
04098
04099 return a;
04100 }
04101
04102 const Integer& ModularArithmetic::Subtract(const Integer &a, const Integer &b) const
04103 {
04104 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04105 {
04106 if (CryptoPP::Subtract(m_result.reg.begin(), a.reg, b.reg, a.reg.size()))
04107 CryptoPP::Add(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
04108 return m_result;
04109 }
04110 else
04111 {
04112 m_result1 = a-b;
04113 if (m_result1.IsNegative())
04114 m_result1 += m_modulus;
04115 return m_result1;
04116 }
04117 }
04118
04119 Integer& ModularArithmetic::Reduce(Integer &a, const Integer &b) const
04120 {
04121 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04122 {
04123 if (CryptoPP::Subtract(a.reg, a.reg, b.reg, a.reg.size()))
04124 CryptoPP::Add(a.reg, a.reg, m_modulus.reg, a.reg.size());
04125 }
04126 else
04127 {
04128 a-=b;
04129 if (a.IsNegative())
04130 a+=m_modulus;
04131 }
04132
04133 return a;
04134 }
04135
04136 const Integer& ModularArithmetic::Inverse(const Integer &a) const
04137 {
04138 if (!a)
04139 return a;
04140
04141 CopyWords(m_result.reg.begin(), m_modulus.reg, m_modulus.reg.size());
04142 if (CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, a.reg, a.reg.size()))
04143 Decrement(m_result.reg.begin()+a.reg.size(), m_modulus.reg.size()-a.reg.size());
04144
04145 return m_result;
04146 }
04147
04148 Integer ModularArithmetic::CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
04149 {
04150 if (m_modulus.IsOdd())
04151 {
04152 MontgomeryRepresentation dr(m_modulus);
04153 return dr.ConvertOut(dr.CascadeExponentiate(dr.ConvertIn(x), e1, dr.ConvertIn(y), e2));
04154 }
04155 else
04156 return AbstractRing<Integer>::CascadeExponentiate(x, e1, y, e2);
04157 }
04158
04159 void ModularArithmetic::SimultaneousExponentiate(Integer *results, const Integer &base, const Integer *exponents, unsigned int exponentsCount) const
04160 {
04161 if (m_modulus.IsOdd())
04162 {
04163 MontgomeryRepresentation dr(m_modulus);
04164 dr.SimultaneousExponentiate(results, dr.ConvertIn(base), exponents, exponentsCount);
04165 for (unsigned int i=0; i<exponentsCount; i++)
04166 results[i] = dr.ConvertOut(results[i]);
04167 }
04168 else
04169 AbstractRing<Integer>::SimultaneousExponentiate(results, base, exponents, exponentsCount);
04170 }
04171
04172 MontgomeryRepresentation::MontgomeryRepresentation(const Integer &m)
04173 : ModularArithmetic(m),
04174 m_u((word)0, m_modulus.reg.size()),
04175 m_workspace(5*m_modulus.reg.size())
04176 {
04177 if (!m_modulus.IsOdd())
04178 throw InvalidArgument("MontgomeryRepresentation: Montgomery representation requires an odd modulus");
04179
04180 RecursiveInverseModPower2(m_u.reg, m_workspace, m_modulus.reg, m_modulus.reg.size());
04181 }
04182
04183 const Integer& MontgomeryRepresentation::Multiply(const Integer &a, const Integer &b) const
04184 {
04185 word *const T = m_workspace.begin();
04186 word *const R = m_result.reg.begin();
04187 const size_t N = m_modulus.reg.size();
04188 assert(a.reg.size()<=N && b.reg.size()<=N);
04189
04190 AsymmetricMultiply(T, T+2*N, a.reg, a.reg.size(), b.reg, b.reg.size());
04191 SetWords(T+a.reg.size()+b.reg.size(), 0, 2*N-a.reg.size()-b.reg.size());
04192 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04193 return m_result;
04194 }
04195
04196 const Integer& MontgomeryRepresentation::Square(const Integer &a) const
04197 {
04198 word *const T = m_workspace.begin();
04199 word *const R = m_result.reg.begin();
04200 const size_t N = m_modulus.reg.size();
04201 assert(a.reg.size()<=N);
04202
04203 CryptoPP::Square(T, T+2*N, a.reg, a.reg.size());
04204 SetWords(T+2*a.reg.size(), 0, 2*N-2*a.reg.size());
04205 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04206 return m_result;
04207 }
04208
04209 Integer MontgomeryRepresentation::ConvertOut(const Integer &a) const
04210 {
04211 word *const T = m_workspace.begin();
04212 word *const R = m_result.reg.begin();
04213 const size_t N = m_modulus.reg.size();
04214 assert(a.reg.size()<=N);
04215
04216 CopyWords(T, a.reg, a.reg.size());
04217 SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
04218 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04219 return m_result;
04220 }
04221
04222 const Integer& MontgomeryRepresentation::MultiplicativeInverse(const Integer &a) const
04223 {
04224
04225 word *const T = m_workspace.begin();
04226 word *const R = m_result.reg.begin();
04227 const size_t N = m_modulus.reg.size();
04228 assert(a.reg.size()<=N);
04229
04230 CopyWords(T, a.reg, a.reg.size());
04231 SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
04232 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04233 unsigned k = AlmostInverse(R, T, R, N, m_modulus.reg, N);
04234
04235
04236
04237 if (k>N*WORD_BITS)
04238 DivideByPower2Mod(R, R, k-N*WORD_BITS, m_modulus.reg, N);
04239 else
04240 MultiplyByPower2Mod(R, R, N*WORD_BITS-k, m_modulus.reg, N);
04241
04242 return m_result;
04243 }
04244
04245 NAMESPACE_END
04246
04247 #endif