GNU Radio 3.4.0 C++ API
volk_8ic_x2_multiply_conjugate_16ic_a16.h
Go to the documentation of this file.
00001 #ifndef INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a16_H
00002 #define INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a16_H
00003 
00004 #include <inttypes.h>
00005 #include <stdio.h>
00006 #include <volk/volk_complex.h>
00007 
00008 #if LV_HAVE_SSE4_1
00009 #include <smmintrin.h>
00010 /*!
00011   \brief Multiplys the one complex vector with the complex conjugate of the second complex vector and stores their results in the third vector
00012   \param cVector The complex vector where the results will be stored
00013   \param aVector One of the complex vectors to be multiplied
00014   \param bVector The complex vector which will be converted to complex conjugate and multiplied
00015   \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
00016 */
00017 static inline void volk_8ic_x2_multiply_conjugate_16ic_a16_sse4_1(lv_16sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){
00018   unsigned int number = 0;
00019   const unsigned int quarterPoints = num_points / 4;
00020 
00021   __m128i x, y, realz, imagz;
00022   lv_16sc_t* c = cVector;
00023   const lv_8sc_t* a = aVector;
00024   const lv_8sc_t* b = bVector;
00025   __m128i conjugateSign = _mm_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1);
00026   const int shuffleMask = _MM_SHUFFLE(2,3,0,1);
00027     
00028   for(;number < quarterPoints; number++){
00029     // Convert into 8 bit values into 16 bit values
00030     x = _mm_cvtepi8_epi16(_mm_movpi64_epi64(*(__m64*)a));
00031     y = _mm_cvtepi8_epi16(_mm_movpi64_epi64(*(__m64*)b));
00032       
00033     // Calculate the ar*cr - ai*(-ci) portions
00034     realz = _mm_madd_epi16(x,y);
00035       
00036     // Calculate the complex conjugate of the cr + ci j values
00037     y = _mm_sign_epi16(y, conjugateSign);
00038 
00039     // Shift the order of the cr and ci values
00040     y = _mm_shufflehi_epi16(_mm_shufflelo_epi16(y, shuffleMask ), shuffleMask);
00041 
00042     // Calculate the ar*(-ci) + cr*(ai)
00043     imagz = _mm_madd_epi16(x,y);
00044 
00045     _mm_store_si128((__m128i*)c, _mm_packs_epi32(_mm_unpacklo_epi32(realz, imagz), _mm_unpackhi_epi32(realz, imagz)));
00046 
00047     a += 4;
00048     b += 4;
00049     c += 4;
00050   }
00051     
00052   number = quarterPoints * 4;
00053   int16_t* c16Ptr = (int16_t*)&cVector[number];
00054   int8_t* a8Ptr = (int8_t*)&aVector[number];
00055   int8_t* b8Ptr = (int8_t*)&bVector[number];
00056   for(; number < num_points; number++){
00057     float aReal =  (float)*a8Ptr++;
00058     float aImag =  (float)*a8Ptr++;
00059     lv_32fc_t aVal = lv_32fc_init(aReal, aImag );
00060     float bReal = (float)*b8Ptr++;
00061     float bImag = (float)*b8Ptr++;
00062     lv_32fc_t bVal = lv_32fc_init( bReal, -bImag );
00063     lv_32fc_t temp = aVal * bVal;
00064 
00065     *c16Ptr++ = (int16_t)lv_creal(temp);
00066     *c16Ptr++ = (int16_t)lv_cimag(temp);
00067   }
00068 }
00069 #endif /* LV_HAVE_SSE4_1 */
00070 
00071 #if LV_HAVE_GENERIC
00072 /*!
00073   \brief Multiplys the one complex vector with the complex conjugate of the second complex vector and stores their results in the third vector
00074   \param cVector The complex vector where the results will be stored
00075   \param aVector One of the complex vectors to be multiplied
00076   \param bVector The complex vector which will be converted to complex conjugate and multiplied
00077   \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
00078 */
00079 static inline void volk_8ic_x2_multiply_conjugate_16ic_a16_generic(lv_16sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){
00080   unsigned int number = 0;
00081   int16_t* c16Ptr = (int16_t*)cVector;
00082   int8_t* a8Ptr = (int8_t*)aVector;
00083   int8_t* b8Ptr = (int8_t*)bVector;
00084   for(number =0; number < num_points; number++){
00085     float aReal =  (float)*a8Ptr++;
00086     float aImag =  (float)*a8Ptr++;
00087     lv_32fc_t aVal = lv_32fc_init(aReal, aImag );
00088     float bReal = (float)*b8Ptr++;
00089     float bImag = (float)*b8Ptr++;
00090     lv_32fc_t bVal = lv_32fc_init( bReal, -bImag );
00091     lv_32fc_t temp = aVal * bVal;
00092 
00093     *c16Ptr++ = (int16_t)lv_creal(temp);
00094     *c16Ptr++ = (int16_t)lv_cimag(temp);
00095   }
00096 }
00097 #endif /* LV_HAVE_GENERIC */
00098 
00099 
00100 
00101 
00102 #endif /* INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a16_H */