Crypto++
|
00001 // vmac.cpp - written and placed in the public domain by Wei Dai 00002 // based on Ted Krovetz's public domain vmac.c and draft-krovetz-vmac-01.txt 00003 00004 #include "pch.h" 00005 #include "vmac.h" 00006 #include "argnames.h" 00007 #include "cpu.h" 00008 00009 NAMESPACE_BEGIN(CryptoPP) 00010 00011 #if defined(_MSC_VER) && !CRYPTOPP_BOOL_SLOW_WORD64 00012 #include <intrin.h> 00013 #endif 00014 00015 #define VMAC_BOOL_WORD128 (defined(CRYPTOPP_WORD128_AVAILABLE) && !defined(CRYPTOPP_X64_ASM_AVAILABLE)) 00016 #ifdef __BORLANDC__ 00017 #define const // Turbo C++ 2006 workaround 00018 #endif 00019 static const word64 p64 = W64LIT(0xfffffffffffffeff); /* 2^64 - 257 prime */ 00020 static const word64 m62 = W64LIT(0x3fffffffffffffff); /* 62-bit mask */ 00021 static const word64 m63 = W64LIT(0x7fffffffffffffff); /* 63-bit mask */ 00022 static const word64 m64 = W64LIT(0xffffffffffffffff); /* 64-bit mask */ 00023 static const word64 mpoly = W64LIT(0x1fffffff1fffffff); /* Poly key mask */ 00024 #ifdef __BORLANDC__ 00025 #undef const 00026 #endif 00027 #if VMAC_BOOL_WORD128 00028 #ifdef __powerpc__ 00029 // workaround GCC Bug 31690: ICE with const __uint128_t and C++ front-end 00030 #define m126 ((word128(m62)<<64)|m64) 00031 #else 00032 static const word128 m126 = (word128(m62)<<64)|m64; /* 126-bit mask */ 00033 #endif 00034 #endif 00035 00036 void VMAC_Base::UncheckedSetKey(const byte *userKey, unsigned int keylength, const NameValuePairs ¶ms) 00037 { 00038 int digestLength = params.GetIntValueWithDefault(Name::DigestSize(), DefaultDigestSize()); 00039 if (digestLength != 8 && digestLength != 16) 00040 throw InvalidArgument("VMAC: DigestSize must be 8 or 16"); 00041 m_is128 = digestLength == 16; 00042 00043 m_L1KeyLength = params.GetIntValueWithDefault(Name::L1KeyLength(), 128); 00044 if (m_L1KeyLength <= 0 || m_L1KeyLength % 128 != 0) 00045 throw InvalidArgument("VMAC: L1KeyLength must be a positive multiple of 128"); 00046 00047 AllocateBlocks(); 00048 00049 BlockCipher &cipher = AccessCipher(); 00050 cipher.SetKey(userKey, keylength, params); 00051 unsigned int blockSize = cipher.BlockSize(); 00052 unsigned int blockSizeInWords = blockSize / sizeof(word64); 00053 SecBlock<word64> out(blockSizeInWords); 00054 SecByteBlock in; 00055 in.CleanNew(blockSize); 00056 size_t i; 00057 00058 /* Fill nh key */ 00059 in[0] = 0x80; 00060 cipher.AdvancedProcessBlocks(in, NULL, (byte *)m_nhKey(), m_nhKeySize()*sizeof(word64), cipher.BT_InBlockIsCounter); 00061 ConditionalByteReverse<word64>(BIG_ENDIAN_ORDER, m_nhKey(), m_nhKey(), m_nhKeySize()*sizeof(word64)); 00062 00063 /* Fill poly key */ 00064 in[0] = 0xC0; 00065 in[15] = 0; 00066 for (i = 0; i <= (size_t)m_is128; i++) 00067 { 00068 cipher.ProcessBlock(in, out.BytePtr()); 00069 m_polyState()[i*4+2] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()) & mpoly; 00070 m_polyState()[i*4+3] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()+8) & mpoly; 00071 in[15]++; 00072 } 00073 00074 /* Fill ip key */ 00075 in[0] = 0xE0; 00076 in[15] = 0; 00077 word64 *l3Key = m_l3Key(); 00078 for (i = 0; i <= (size_t)m_is128; i++) 00079 do 00080 { 00081 cipher.ProcessBlock(in, out.BytePtr()); 00082 l3Key[i*2+0] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()); 00083 l3Key[i*2+1] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()+8); 00084 in[15]++; 00085 } while ((l3Key[i*2+0] >= p64) || (l3Key[i*2+1] >= p64)); 00086 00087 m_padCached = false; 00088 size_t nonceLength; 00089 const byte *nonce = GetIVAndThrowIfInvalid(params, nonceLength); 00090 Resynchronize(nonce, (int)nonceLength); 00091 } 00092 00093 void VMAC_Base::GetNextIV(RandomNumberGenerator &rng, byte *IV) 00094 { 00095 SimpleKeyingInterface::GetNextIV(rng, IV); 00096 IV[0] &= 0x7f; 00097 } 00098 00099 void VMAC_Base::Resynchronize(const byte *nonce, int len) 00100 { 00101 size_t length = ThrowIfInvalidIVLength(len); 00102 size_t s = IVSize(); 00103 byte *storedNonce = m_nonce(); 00104 00105 if (m_is128) 00106 { 00107 memset(storedNonce, 0, s-length); 00108 memcpy(storedNonce+s-length, nonce, length); 00109 AccessCipher().ProcessBlock(storedNonce, m_pad()); 00110 } 00111 else 00112 { 00113 if (m_padCached && (storedNonce[s-1] | 1) == (nonce[length-1] | 1)) 00114 { 00115 m_padCached = VerifyBufsEqual(storedNonce+s-length, nonce, length-1); 00116 for (size_t i=0; m_padCached && i<s-length; i++) 00117 m_padCached = (storedNonce[i] == 0); 00118 } 00119 if (!m_padCached) 00120 { 00121 memset(storedNonce, 0, s-length); 00122 memcpy(storedNonce+s-length, nonce, length-1); 00123 storedNonce[s-1] = nonce[length-1] & 0xfe; 00124 AccessCipher().ProcessBlock(storedNonce, m_pad()); 00125 m_padCached = true; 00126 } 00127 storedNonce[s-1] = nonce[length-1]; 00128 } 00129 m_isFirstBlock = true; 00130 Restart(); 00131 } 00132 00133 void VMAC_Base::HashEndianCorrectedBlock(const word64 *data) 00134 { 00135 assert(false); 00136 throw 0; 00137 } 00138 00139 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86 00140 #pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code 00141 void 00142 #ifdef __GNUC__ 00143 __attribute__ ((noinline)) // Intel Compiler 9.1 workaround 00144 #endif 00145 VMAC_Base::VHASH_Update_SSE2(const word64 *data, size_t blocksRemainingInWord64, int tagPart) 00146 { 00147 const word64 *nhK = m_nhKey(); 00148 word64 *polyS = m_polyState(); 00149 word32 L1KeyLength = m_L1KeyLength; 00150 00151 #ifdef __GNUC__ 00152 word32 temp; 00153 __asm__ __volatile__ 00154 ( 00155 AS2( mov %%ebx, %0) 00156 AS2( mov %1, %%ebx) 00157 ".intel_syntax noprefix;" 00158 #else 00159 #if _MSC_VER < 1300 || defined(__INTEL_COMPILER) 00160 char isFirstBlock = m_isFirstBlock; 00161 AS2( mov ebx, [L1KeyLength]) 00162 AS2( mov dl, [isFirstBlock]) 00163 #else 00164 AS2( mov ecx, this) 00165 AS2( mov ebx, [ecx+m_L1KeyLength]) 00166 AS2( mov dl, [ecx+m_isFirstBlock]) 00167 #endif 00168 AS2( mov eax, tagPart) 00169 AS2( shl eax, 4) 00170 AS2( mov edi, nhK) 00171 AS2( add edi, eax) 00172 AS2( add eax, eax) 00173 AS2( add eax, polyS) 00174 00175 AS2( mov esi, data) 00176 AS2( mov ecx, blocksRemainingInWord64) 00177 #endif 00178 00179 AS2( shr ebx, 3) 00180 AS1( push ebp) 00181 AS2( sub esp, 12) 00182 ASL(4) 00183 AS2( mov ebp, ebx) 00184 AS2( cmp ecx, ebx) 00185 AS2( cmovl ebp, ecx) 00186 AS2( sub ecx, ebp) 00187 AS2( lea ebp, [edi+8*ebp]) // end of nhK 00188 AS2( movq mm6, [esi]) 00189 AS2( paddq mm6, [edi]) 00190 AS2( movq mm5, [esi+8]) 00191 AS2( paddq mm5, [edi+8]) 00192 AS2( add esi, 16) 00193 AS2( add edi, 16) 00194 AS2( movq mm4, mm6) 00195 ASS( pshufw mm2, mm6, 1, 0, 3, 2) 00196 AS2( pmuludq mm6, mm5) 00197 ASS( pshufw mm3, mm5, 1, 0, 3, 2) 00198 AS2( pmuludq mm5, mm2) 00199 AS2( pmuludq mm2, mm3) 00200 AS2( pmuludq mm3, mm4) 00201 AS2( pxor mm7, mm7) 00202 AS2( movd [esp], mm6) 00203 AS2( psrlq mm6, 32) 00204 AS2( movd [esp+4], mm5) 00205 AS2( psrlq mm5, 32) 00206 AS2( cmp edi, ebp) 00207 ASJ( je, 1, f) 00208 ASL(0) 00209 AS2( movq mm0, [esi]) 00210 AS2( paddq mm0, [edi]) 00211 AS2( movq mm1, [esi+8]) 00212 AS2( paddq mm1, [edi+8]) 00213 AS2( add esi, 16) 00214 AS2( add edi, 16) 00215 AS2( movq mm4, mm0) 00216 AS2( paddq mm5, mm2) 00217 ASS( pshufw mm2, mm0, 1, 0, 3, 2) 00218 AS2( pmuludq mm0, mm1) 00219 AS2( movd [esp+8], mm3) 00220 AS2( psrlq mm3, 32) 00221 AS2( paddq mm5, mm3) 00222 ASS( pshufw mm3, mm1, 1, 0, 3, 2) 00223 AS2( pmuludq mm1, mm2) 00224 AS2( pmuludq mm2, mm3) 00225 AS2( pmuludq mm3, mm4) 00226 AS2( movd mm4, [esp]) 00227 AS2( paddq mm7, mm4) 00228 AS2( movd mm4, [esp+4]) 00229 AS2( paddq mm6, mm4) 00230 AS2( movd mm4, [esp+8]) 00231 AS2( paddq mm6, mm4) 00232 AS2( movd [esp], mm0) 00233 AS2( psrlq mm0, 32) 00234 AS2( paddq mm6, mm0) 00235 AS2( movd [esp+4], mm1) 00236 AS2( psrlq mm1, 32) 00237 AS2( paddq mm5, mm1) 00238 AS2( cmp edi, ebp) 00239 ASJ( jne, 0, b) 00240 ASL(1) 00241 AS2( paddq mm5, mm2) 00242 AS2( movd [esp+8], mm3) 00243 AS2( psrlq mm3, 32) 00244 AS2( paddq mm5, mm3) 00245 AS2( movd mm4, [esp]) 00246 AS2( paddq mm7, mm4) 00247 AS2( movd mm4, [esp+4]) 00248 AS2( paddq mm6, mm4) 00249 AS2( movd mm4, [esp+8]) 00250 AS2( paddq mm6, mm4) 00251 AS2( lea ebp, [8*ebx]) 00252 AS2( sub edi, ebp) // reset edi to start of nhK 00253 00254 AS2( movd [esp], mm7) 00255 AS2( psrlq mm7, 32) 00256 AS2( paddq mm6, mm7) 00257 AS2( movd [esp+4], mm6) 00258 AS2( psrlq mm6, 32) 00259 AS2( paddq mm5, mm6) 00260 AS2( psllq mm5, 2) 00261 AS2( psrlq mm5, 2) 00262 00263 #define a0 [eax+2*4] 00264 #define a1 [eax+3*4] 00265 #define a2 [eax+0*4] 00266 #define a3 [eax+1*4] 00267 #define k0 [eax+2*8+2*4] 00268 #define k1 [eax+2*8+3*4] 00269 #define k2 [eax+2*8+0*4] 00270 #define k3 [eax+2*8+1*4] 00271 AS2( test dl, dl) 00272 ASJ( jz, 2, f) 00273 AS2( movd mm1, k0) 00274 AS2( movd mm0, [esp]) 00275 AS2( paddq mm0, mm1) 00276 AS2( movd a0, mm0) 00277 AS2( psrlq mm0, 32) 00278 AS2( movd mm1, k1) 00279 AS2( movd mm2, [esp+4]) 00280 AS2( paddq mm1, mm2) 00281 AS2( paddq mm0, mm1) 00282 AS2( movd a1, mm0) 00283 AS2( psrlq mm0, 32) 00284 AS2( paddq mm5, k2) 00285 AS2( paddq mm0, mm5) 00286 AS2( movq a2, mm0) 00287 AS2( xor edx, edx) 00288 ASJ( jmp, 3, f) 00289 ASL(2) 00290 AS2( movd mm0, a3) 00291 AS2( movq mm4, mm0) 00292 AS2( pmuludq mm0, k3) // a3*k3 00293 AS2( movd mm1, a0) 00294 AS2( pmuludq mm1, k2) // a0*k2 00295 AS2( movd mm2, a1) 00296 AS2( movd mm6, k1) 00297 AS2( pmuludq mm2, mm6) // a1*k1 00298 AS2( movd mm3, a2) 00299 AS2( psllq mm0, 1) 00300 AS2( paddq mm0, mm5) 00301 AS2( movq mm5, mm3) 00302 AS2( movd mm7, k0) 00303 AS2( pmuludq mm3, mm7) // a2*k0 00304 AS2( pmuludq mm4, mm7) // a3*k0 00305 AS2( pmuludq mm5, mm6) // a2*k1 00306 AS2( paddq mm0, mm1) 00307 AS2( movd mm1, a1) 00308 AS2( paddq mm4, mm5) 00309 AS2( movq mm5, mm1) 00310 AS2( pmuludq mm1, k2) // a1*k2 00311 AS2( paddq mm0, mm2) 00312 AS2( movd mm2, a0) 00313 AS2( paddq mm0, mm3) 00314 AS2( movq mm3, mm2) 00315 AS2( pmuludq mm2, k3) // a0*k3 00316 AS2( pmuludq mm3, mm7) // a0*k0 00317 AS2( movd [esp+8], mm0) 00318 AS2( psrlq mm0, 32) 00319 AS2( pmuludq mm7, mm5) // a1*k0 00320 AS2( pmuludq mm5, k3) // a1*k3 00321 AS2( paddq mm0, mm1) 00322 AS2( movd mm1, a2) 00323 AS2( pmuludq mm1, k2) // a2*k2 00324 AS2( paddq mm0, mm2) 00325 AS2( paddq mm0, mm4) 00326 AS2( movq mm4, mm0) 00327 AS2( movd mm2, a3) 00328 AS2( pmuludq mm2, mm6) // a3*k1 00329 AS2( pmuludq mm6, a0) // a0*k1 00330 AS2( psrlq mm0, 31) 00331 AS2( paddq mm0, mm3) 00332 AS2( movd mm3, [esp]) 00333 AS2( paddq mm0, mm3) 00334 AS2( movd mm3, a2) 00335 AS2( pmuludq mm3, k3) // a2*k3 00336 AS2( paddq mm5, mm1) 00337 AS2( movd mm1, a3) 00338 AS2( pmuludq mm1, k2) // a3*k2 00339 AS2( paddq mm5, mm2) 00340 AS2( movd mm2, [esp+4]) 00341 AS2( psllq mm5, 1) 00342 AS2( paddq mm0, mm5) 00343 AS2( psllq mm4, 33) 00344 AS2( movd a0, mm0) 00345 AS2( psrlq mm0, 32) 00346 AS2( paddq mm6, mm7) 00347 AS2( movd mm7, [esp+8]) 00348 AS2( paddq mm0, mm6) 00349 AS2( paddq mm0, mm2) 00350 AS2( paddq mm3, mm1) 00351 AS2( psllq mm3, 1) 00352 AS2( paddq mm0, mm3) 00353 AS2( psrlq mm4, 1) 00354 AS2( movd a1, mm0) 00355 AS2( psrlq mm0, 32) 00356 AS2( por mm4, mm7) 00357 AS2( paddq mm0, mm4) 00358 AS2( movq a2, mm0) 00359 #undef a0 00360 #undef a1 00361 #undef a2 00362 #undef a3 00363 #undef k0 00364 #undef k1 00365 #undef k2 00366 #undef k3 00367 00368 ASL(3) 00369 AS2( test ecx, ecx) 00370 ASJ( jnz, 4, b) 00371 00372 AS2( add esp, 12) 00373 AS1( pop ebp) 00374 AS1( emms) 00375 #ifdef __GNUC__ 00376 ".att_syntax prefix;" 00377 AS2( mov %0, %%ebx) 00378 : "=m" (temp) 00379 : "m" (L1KeyLength), "c" (blocksRemainingInWord64), "S" (data), "D" (nhK+tagPart*2), "d" (m_isFirstBlock), "a" (polyS+tagPart*4) 00380 : "memory", "cc" 00381 ); 00382 #endif 00383 } 00384 #endif 00385 00386 #if VMAC_BOOL_WORD128 00387 #define DeclareNH(a) word128 a=0 00388 #define MUL64(rh,rl,i1,i2) {word128 p = word128(i1)*(i2); rh = word64(p>>64); rl = word64(p);} 00389 #define AccumulateNH(a, b, c) a += word128(b)*(c) 00390 #define Multiply128(r, i1, i2) r = word128(word64(i1)) * word64(i2) 00391 #else 00392 #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER) 00393 #define MUL32(a, b) __emulu(word32(a), word32(b)) 00394 #else 00395 #define MUL32(a, b) ((word64)((word32)(a)) * (word32)(b)) 00396 #endif 00397 #if defined(CRYPTOPP_X64_ASM_AVAILABLE) 00398 #define DeclareNH(a) word64 a##0=0, a##1=0 00399 #define MUL64(rh,rl,i1,i2) asm ("mulq %3" : "=a"(rl), "=d"(rh) : "a"(i1), "g"(i2) : "cc"); 00400 #define AccumulateNH(a, b, c) asm ("mulq %3; addq %%rax, %0; adcq %%rdx, %1" : "+r"(a##0), "+r"(a##1) : "a"(b), "g"(c) : "%rdx", "cc"); 00401 #define ADD128(rh,rl,ih,il) asm ("addq %3, %1; adcq %2, %0" : "+r"(rh),"+r"(rl) : "r"(ih),"r"(il) : "cc"); 00402 #elif defined(_MSC_VER) && !CRYPTOPP_BOOL_SLOW_WORD64 00403 #define DeclareNH(a) word64 a##0=0, a##1=0 00404 #define MUL64(rh,rl,i1,i2) (rl) = _umul128(i1,i2,&(rh)); 00405 #define AccumulateNH(a, b, c) {\ 00406 word64 ph, pl;\ 00407 pl = _umul128(b,c,&ph);\ 00408 a##0 += pl;\ 00409 a##1 += ph + (a##0 < pl);} 00410 #else 00411 #define VMAC_BOOL_32BIT 1 00412 #define DeclareNH(a) word64 a##0=0, a##1=0, a##2=0 00413 #define MUL64(rh,rl,i1,i2) \ 00414 { word64 _i1 = (i1), _i2 = (i2); \ 00415 word64 m1= MUL32(_i1,_i2>>32); \ 00416 word64 m2= MUL32(_i1>>32,_i2); \ 00417 rh = MUL32(_i1>>32,_i2>>32); \ 00418 rl = MUL32(_i1,_i2); \ 00419 ADD128(rh,rl,(m1 >> 32),(m1 << 32)); \ 00420 ADD128(rh,rl,(m2 >> 32),(m2 << 32)); \ 00421 } 00422 #define AccumulateNH(a, b, c) {\ 00423 word64 p = MUL32(b, c);\ 00424 a##1 += word32((p)>>32);\ 00425 a##0 += word32(p);\ 00426 p = MUL32((b)>>32, c);\ 00427 a##2 += word32((p)>>32);\ 00428 a##1 += word32(p);\ 00429 p = MUL32((b)>>32, (c)>>32);\ 00430 a##2 += p;\ 00431 p = MUL32(b, (c)>>32);\ 00432 a##1 += word32(p);\ 00433 a##2 += word32(p>>32);} 00434 #endif 00435 #endif 00436 #ifndef VMAC_BOOL_32BIT 00437 #define VMAC_BOOL_32BIT 0 00438 #endif 00439 #ifndef ADD128 00440 #define ADD128(rh,rl,ih,il) \ 00441 { word64 _il = (il); \ 00442 (rl) += (_il); \ 00443 (rh) += (ih) + ((rl) < (_il)); \ 00444 } 00445 #endif 00446 00447 #if !(defined(_MSC_VER) && _MSC_VER < 1300) 00448 template <bool T_128BitTag> 00449 #endif 00450 void VMAC_Base::VHASH_Update_Template(const word64 *data, size_t blocksRemainingInWord64) 00451 { 00452 #define INNER_LOOP_ITERATION(j) {\ 00453 word64 d0 = ConditionalByteReverse(LITTLE_ENDIAN_ORDER, data[i+2*j+0]);\ 00454 word64 d1 = ConditionalByteReverse(LITTLE_ENDIAN_ORDER, data[i+2*j+1]);\ 00455 AccumulateNH(nhA, d0+nhK[i+2*j+0], d1+nhK[i+2*j+1]);\ 00456 if (T_128BitTag)\ 00457 AccumulateNH(nhB, d0+nhK[i+2*j+2], d1+nhK[i+2*j+3]);\ 00458 } 00459 00460 #if (defined(_MSC_VER) && _MSC_VER < 1300) 00461 bool T_128BitTag = m_is128; 00462 #endif 00463 size_t L1KeyLengthInWord64 = m_L1KeyLength / 8; 00464 size_t innerLoopEnd = L1KeyLengthInWord64; 00465 const word64 *nhK = m_nhKey(); 00466 word64 *polyS = m_polyState(); 00467 bool isFirstBlock = true; 00468 size_t i; 00469 00470 #if !VMAC_BOOL_32BIT 00471 #if VMAC_BOOL_WORD128 00472 word128 a1, a2; 00473 #else 00474 word64 ah1, al1, ah2, al2; 00475 #endif 00476 word64 kh1, kl1, kh2, kl2; 00477 kh1=(polyS+0*4+2)[0]; kl1=(polyS+0*4+2)[1]; 00478 if (T_128BitTag) 00479 { 00480 kh2=(polyS+1*4+2)[0]; kl2=(polyS+1*4+2)[1]; 00481 } 00482 #endif 00483 00484 do 00485 { 00486 DeclareNH(nhA); 00487 DeclareNH(nhB); 00488 00489 i = 0; 00490 if (blocksRemainingInWord64 < L1KeyLengthInWord64) 00491 { 00492 if (blocksRemainingInWord64 % 8) 00493 { 00494 innerLoopEnd = blocksRemainingInWord64 % 8; 00495 for (; i<innerLoopEnd; i+=2) 00496 INNER_LOOP_ITERATION(0); 00497 } 00498 innerLoopEnd = blocksRemainingInWord64; 00499 } 00500 for (; i<innerLoopEnd; i+=8) 00501 { 00502 INNER_LOOP_ITERATION(0); 00503 INNER_LOOP_ITERATION(1); 00504 INNER_LOOP_ITERATION(2); 00505 INNER_LOOP_ITERATION(3); 00506 } 00507 blocksRemainingInWord64 -= innerLoopEnd; 00508 data += innerLoopEnd; 00509 00510 #if VMAC_BOOL_32BIT 00511 word32 nh0[2], nh1[2]; 00512 word64 nh2[2]; 00513 00514 nh0[0] = word32(nhA0); 00515 nhA1 += (nhA0 >> 32); 00516 nh1[0] = word32(nhA1); 00517 nh2[0] = (nhA2 + (nhA1 >> 32)) & m62; 00518 00519 if (T_128BitTag) 00520 { 00521 nh0[1] = word32(nhB0); 00522 nhB1 += (nhB0 >> 32); 00523 nh1[1] = word32(nhB1); 00524 nh2[1] = (nhB2 + (nhB1 >> 32)) & m62; 00525 } 00526 00527 #define a0 (((word32 *)(polyS+i*4))[2+NativeByteOrder::ToEnum()]) 00528 #define a1 (*(((word32 *)(polyS+i*4))+3-NativeByteOrder::ToEnum())) // workaround for GCC 3.2 00529 #define a2 (((word32 *)(polyS+i*4))[0+NativeByteOrder::ToEnum()]) 00530 #define a3 (*(((word32 *)(polyS+i*4))+1-NativeByteOrder::ToEnum())) 00531 #define aHi ((polyS+i*4)[0]) 00532 #define k0 (((word32 *)(polyS+i*4+2))[2+NativeByteOrder::ToEnum()]) 00533 #define k1 (*(((word32 *)(polyS+i*4+2))+3-NativeByteOrder::ToEnum())) 00534 #define k2 (((word32 *)(polyS+i*4+2))[0+NativeByteOrder::ToEnum()]) 00535 #define k3 (*(((word32 *)(polyS+i*4+2))+1-NativeByteOrder::ToEnum())) 00536 #define kHi ((polyS+i*4+2)[0]) 00537 00538 if (isFirstBlock) 00539 { 00540 isFirstBlock = false; 00541 if (m_isFirstBlock) 00542 { 00543 m_isFirstBlock = false; 00544 for (i=0; i<=(size_t)T_128BitTag; i++) 00545 { 00546 word64 t = (word64)nh0[i] + k0; 00547 a0 = (word32)t; 00548 t = (t >> 32) + nh1[i] + k1; 00549 a1 = (word32)t; 00550 aHi = (t >> 32) + nh2[i] + kHi; 00551 } 00552 continue; 00553 } 00554 } 00555 for (i=0; i<=(size_t)T_128BitTag; i++) 00556 { 00557 word64 p, t; 00558 word32 t2; 00559 00560 p = MUL32(a3, 2*k3); 00561 p += nh2[i]; 00562 p += MUL32(a0, k2); 00563 p += MUL32(a1, k1); 00564 p += MUL32(a2, k0); 00565 t2 = (word32)p; 00566 p >>= 32; 00567 p += MUL32(a0, k3); 00568 p += MUL32(a1, k2); 00569 p += MUL32(a2, k1); 00570 p += MUL32(a3, k0); 00571 t = (word64(word32(p) & 0x7fffffff) << 32) | t2; 00572 p >>= 31; 00573 p += nh0[i]; 00574 p += MUL32(a0, k0); 00575 p += MUL32(a1, 2*k3); 00576 p += MUL32(a2, 2*k2); 00577 p += MUL32(a3, 2*k1); 00578 t2 = (word32)p; 00579 p >>= 32; 00580 p += nh1[i]; 00581 p += MUL32(a0, k1); 00582 p += MUL32(a1, k0); 00583 p += MUL32(a2, 2*k3); 00584 p += MUL32(a3, 2*k2); 00585 a0 = t2; 00586 a1 = (word32)p; 00587 aHi = (p >> 32) + t; 00588 } 00589 00590 #undef a0 00591 #undef a1 00592 #undef a2 00593 #undef a3 00594 #undef aHi 00595 #undef k0 00596 #undef k1 00597 #undef k2 00598 #undef k3 00599 #undef kHi 00600 #else // #if VMAC_BOOL_32BIT 00601 if (isFirstBlock) 00602 { 00603 isFirstBlock = false; 00604 if (m_isFirstBlock) 00605 { 00606 m_isFirstBlock = false; 00607 #if VMAC_BOOL_WORD128 00608 #define first_poly_step(a, kh, kl, m) a = (m & m126) + ((word128(kh) << 64) | kl) 00609 00610 first_poly_step(a1, kh1, kl1, nhA); 00611 if (T_128BitTag) 00612 first_poly_step(a2, kh2, kl2, nhB); 00613 #else 00614 #define first_poly_step(ah, al, kh, kl, mh, ml) {\ 00615 mh &= m62;\ 00616 ADD128(mh, ml, kh, kl); \ 00617 ah = mh; al = ml;} 00618 00619 first_poly_step(ah1, al1, kh1, kl1, nhA1, nhA0); 00620 if (T_128BitTag) 00621 first_poly_step(ah2, al2, kh2, kl2, nhB1, nhB0); 00622 #endif 00623 continue; 00624 } 00625 else 00626 { 00627 #if VMAC_BOOL_WORD128 00628 a1 = (word128((polyS+0*4)[0]) << 64) | (polyS+0*4)[1]; 00629 #else 00630 ah1=(polyS+0*4)[0]; al1=(polyS+0*4)[1]; 00631 #endif 00632 if (T_128BitTag) 00633 { 00634 #if VMAC_BOOL_WORD128 00635 a2 = (word128((polyS+1*4)[0]) << 64) | (polyS+1*4)[1]; 00636 #else 00637 ah2=(polyS+1*4)[0]; al2=(polyS+1*4)[1]; 00638 #endif 00639 } 00640 } 00641 } 00642 00643 #if VMAC_BOOL_WORD128 00644 #define poly_step(a, kh, kl, m) \ 00645 { word128 t1, t2, t3, t4;\ 00646 Multiply128(t2, a>>64, kl);\ 00647 Multiply128(t3, a, kh);\ 00648 Multiply128(t1, a, kl);\ 00649 Multiply128(t4, a>>64, 2*kh);\ 00650 t2 += t3;\ 00651 t4 += t1;\ 00652 t2 += t4>>64;\ 00653 a = (word128(word64(t2)&m63) << 64) | word64(t4);\ 00654 t2 *= 2;\ 00655 a += m & m126;\ 00656 a += t2>>64;} 00657 00658 poly_step(a1, kh1, kl1, nhA); 00659 if (T_128BitTag) 00660 poly_step(a2, kh2, kl2, nhB); 00661 #else 00662 #define poly_step(ah, al, kh, kl, mh, ml) \ 00663 { word64 t1h, t1l, t2h, t2l, t3h, t3l, z=0; \ 00664 /* compute ab*cd, put bd into result registers */ \ 00665 MUL64(t2h,t2l,ah,kl); \ 00666 MUL64(t3h,t3l,al,kh); \ 00667 MUL64(t1h,t1l,ah,2*kh); \ 00668 MUL64(ah,al,al,kl); \ 00669 /* add together ad + bc */ \ 00670 ADD128(t2h,t2l,t3h,t3l); \ 00671 /* add 2 * ac to result */ \ 00672 ADD128(ah,al,t1h,t1l); \ 00673 /* now (ah,al), (t2l,2*t2h) need summing */ \ 00674 /* first add the high registers, carrying into t2h */ \ 00675 ADD128(t2h,ah,z,t2l); \ 00676 /* double t2h and add top bit of ah */ \ 00677 t2h += t2h + (ah >> 63); \ 00678 ah &= m63; \ 00679 /* now add the low registers */ \ 00680 mh &= m62; \ 00681 ADD128(ah,al,mh,ml); \ 00682 ADD128(ah,al,z,t2h); \ 00683 } 00684 00685 poly_step(ah1, al1, kh1, kl1, nhA1, nhA0); 00686 if (T_128BitTag) 00687 poly_step(ah2, al2, kh2, kl2, nhB1, nhB0); 00688 #endif 00689 #endif // #if VMAC_BOOL_32BIT 00690 } while (blocksRemainingInWord64); 00691 00692 #if VMAC_BOOL_WORD128 00693 (polyS+0*4)[0]=word64(a1>>64); (polyS+0*4)[1]=word64(a1); 00694 if (T_128BitTag) 00695 { 00696 (polyS+1*4)[0]=word64(a2>>64); (polyS+1*4)[1]=word64(a2); 00697 } 00698 #elif !VMAC_BOOL_32BIT 00699 (polyS+0*4)[0]=ah1; (polyS+0*4)[1]=al1; 00700 if (T_128BitTag) 00701 { 00702 (polyS+1*4)[0]=ah2; (polyS+1*4)[1]=al2; 00703 } 00704 #endif 00705 } 00706 00707 inline void VMAC_Base::VHASH_Update(const word64 *data, size_t blocksRemainingInWord64) 00708 { 00709 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86 00710 if (HasSSE2()) 00711 { 00712 VHASH_Update_SSE2(data, blocksRemainingInWord64, 0); 00713 if (m_is128) 00714 VHASH_Update_SSE2(data, blocksRemainingInWord64, 1); 00715 m_isFirstBlock = false; 00716 } 00717 else 00718 #endif 00719 { 00720 #if defined(_MSC_VER) && _MSC_VER < 1300 00721 VHASH_Update_Template(data, blocksRemainingInWord64); 00722 #else 00723 if (m_is128) 00724 VHASH_Update_Template<true>(data, blocksRemainingInWord64); 00725 else 00726 VHASH_Update_Template<false>(data, blocksRemainingInWord64); 00727 #endif 00728 } 00729 } 00730 00731 size_t VMAC_Base::HashMultipleBlocks(const word64 *data, size_t length) 00732 { 00733 size_t remaining = ModPowerOf2(length, m_L1KeyLength); 00734 VHASH_Update(data, (length-remaining)/8); 00735 return remaining; 00736 } 00737 00738 static word64 L3Hash(const word64 *input, const word64 *l3Key, size_t len) 00739 { 00740 word64 rh, rl, t, z=0; 00741 word64 p1 = input[0], p2 = input[1]; 00742 word64 k1 = l3Key[0], k2 = l3Key[1]; 00743 00744 /* fully reduce (p1,p2)+(len,0) mod p127 */ 00745 t = p1 >> 63; 00746 p1 &= m63; 00747 ADD128(p1, p2, len, t); 00748 /* At this point, (p1,p2) is at most 2^127+(len<<64) */ 00749 t = (p1 > m63) + ((p1 == m63) & (p2 == m64)); 00750 ADD128(p1, p2, z, t); 00751 p1 &= m63; 00752 00753 /* compute (p1,p2)/(2^64-2^32) and (p1,p2)%(2^64-2^32) */ 00754 t = p1 + (p2 >> 32); 00755 t += (t >> 32); 00756 t += (word32)t > 0xfffffffeU; 00757 p1 += (t >> 32); 00758 p2 += (p1 << 32); 00759 00760 /* compute (p1+k1)%p64 and (p2+k2)%p64 */ 00761 p1 += k1; 00762 p1 += (0 - (p1 < k1)) & 257; 00763 p2 += k2; 00764 p2 += (0 - (p2 < k2)) & 257; 00765 00766 /* compute (p1+k1)*(p2+k2)%p64 */ 00767 MUL64(rh, rl, p1, p2); 00768 t = rh >> 56; 00769 ADD128(t, rl, z, rh); 00770 rh <<= 8; 00771 ADD128(t, rl, z, rh); 00772 t += t << 8; 00773 rl += t; 00774 rl += (0 - (rl < t)) & 257; 00775 rl += (0 - (rl > p64-1)) & 257; 00776 return rl; 00777 } 00778 00779 void VMAC_Base::TruncatedFinal(byte *mac, size_t size) 00780 { 00781 size_t len = ModPowerOf2(GetBitCountLo()/8, m_L1KeyLength); 00782 00783 if (len) 00784 { 00785 memset(m_data()+len, 0, (0-len)%16); 00786 VHASH_Update(DataBuf(), ((len+15)/16)*2); 00787 len *= 8; // convert to bits 00788 } 00789 else if (m_isFirstBlock) 00790 { 00791 // special case for empty string 00792 m_polyState()[0] = m_polyState()[2]; 00793 m_polyState()[1] = m_polyState()[3]; 00794 if (m_is128) 00795 { 00796 m_polyState()[4] = m_polyState()[6]; 00797 m_polyState()[5] = m_polyState()[7]; 00798 } 00799 } 00800 00801 if (m_is128) 00802 { 00803 word64 t[2]; 00804 t[0] = L3Hash(m_polyState(), m_l3Key(), len) + GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad()); 00805 t[1] = L3Hash(m_polyState()+4, m_l3Key()+2, len) + GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad()+8); 00806 if (size == 16) 00807 { 00808 PutWord(false, BIG_ENDIAN_ORDER, mac, t[0]); 00809 PutWord(false, BIG_ENDIAN_ORDER, mac+8, t[1]); 00810 } 00811 else 00812 { 00813 t[0] = ConditionalByteReverse(BIG_ENDIAN_ORDER, t[0]); 00814 t[1] = ConditionalByteReverse(BIG_ENDIAN_ORDER, t[1]); 00815 memcpy(mac, t, size); 00816 } 00817 } 00818 else 00819 { 00820 word64 t = L3Hash(m_polyState(), m_l3Key(), len); 00821 t += GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad() + (m_nonce()[IVSize()-1]&1) * 8); 00822 if (size == 8) 00823 PutWord(false, BIG_ENDIAN_ORDER, mac, t); 00824 else 00825 { 00826 t = ConditionalByteReverse(BIG_ENDIAN_ORDER, t); 00827 memcpy(mac, &t, size); 00828 } 00829 } 00830 } 00831 00832 NAMESPACE_END