GNU Radio 3.4.0 C++ API
volk_8ic_s32f_deinterleave_32f_x2_a16.h
Go to the documentation of this file.
00001 #ifndef INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a16_H
00002 #define INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a16_H
00003 
00004 #include <inttypes.h>
00005 #include <stdio.h>
00006 
00007 #if LV_HAVE_SSE4_1
00008 #include <smmintrin.h>
00009 /*!
00010   \brief Deinterleaves the complex 8 bit vector into I & Q floating point vector data
00011   \param complexVector The complex input vector
00012   \param iBuffer The I buffer output data
00013   \param qBuffer The Q buffer output data
00014   \param scalar The scaling value being multiplied against each data point
00015   \param num_points The number of complex data values to be deinterleaved
00016 */
00017 static inline void volk_8ic_s32f_deinterleave_32f_x2_a16_sse4_1(float* iBuffer, float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){
00018   float* iBufferPtr = iBuffer;
00019   float* qBufferPtr = qBuffer;
00020 
00021   unsigned int number = 0;
00022   const unsigned int eighthPoints = num_points / 8;    
00023   __m128 iFloatValue, qFloatValue;
00024 
00025   const float iScalar= 1.0 / scalar;
00026   __m128 invScalar = _mm_set_ps1(iScalar);
00027   __m128i complexVal, iIntVal, qIntVal, iComplexVal, qComplexVal;
00028   int8_t* complexVectorPtr = (int8_t*)complexVector;
00029 
00030   __m128i iMoveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
00031   __m128i qMoveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 15, 13, 11, 9, 7, 5, 3, 1);
00032 
00033   for(;number < eighthPoints; number++){
00034     complexVal = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 16;
00035     iComplexVal = _mm_shuffle_epi8(complexVal, iMoveMask);
00036     qComplexVal = _mm_shuffle_epi8(complexVal, qMoveMask);
00037 
00038     iIntVal = _mm_cvtepi8_epi32(iComplexVal);
00039     iFloatValue = _mm_cvtepi32_ps(iIntVal);
00040     iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
00041     _mm_store_ps(iBufferPtr, iFloatValue);
00042     iBufferPtr += 4;
00043 
00044     iComplexVal = _mm_srli_si128(iComplexVal, 4);
00045 
00046     iIntVal = _mm_cvtepi8_epi32(iComplexVal);
00047     iFloatValue = _mm_cvtepi32_ps(iIntVal);
00048     iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
00049     _mm_store_ps(iBufferPtr, iFloatValue);
00050     iBufferPtr += 4;
00051 
00052     qIntVal = _mm_cvtepi8_epi32(qComplexVal);
00053     qFloatValue = _mm_cvtepi32_ps(qIntVal);
00054     qFloatValue = _mm_mul_ps(qFloatValue, invScalar);
00055     _mm_store_ps(qBufferPtr, qFloatValue);
00056     qBufferPtr += 4;
00057 
00058     qComplexVal = _mm_srli_si128(qComplexVal, 4);
00059 
00060     qIntVal = _mm_cvtepi8_epi32(qComplexVal);
00061     qFloatValue = _mm_cvtepi32_ps(qIntVal);
00062     qFloatValue = _mm_mul_ps(qFloatValue, invScalar);
00063     _mm_store_ps(qBufferPtr, qFloatValue);
00064 
00065     qBufferPtr += 4;
00066   }
00067 
00068   number = eighthPoints * 8;
00069   for(; number < num_points; number++){
00070     *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
00071     *qBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
00072   }
00073     
00074 }
00075 #endif /* LV_HAVE_SSE4_1 */
00076 
00077 #if LV_HAVE_SSE
00078 #include <xmmintrin.h>
00079 /*!
00080   \brief Deinterleaves the complex 8 bit vector into I & Q floating point vector data
00081   \param complexVector The complex input vector
00082   \param iBuffer The I buffer output data
00083   \param qBuffer The Q buffer output data
00084   \param scalar The scaling value being multiplied against each data point
00085   \param num_points The number of complex data values to be deinterleaved
00086 */
00087 static inline void volk_8ic_s32f_deinterleave_32f_x2_a16_sse(float* iBuffer, float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){
00088   float* iBufferPtr = iBuffer;
00089   float* qBufferPtr = qBuffer;
00090 
00091   unsigned int number = 0;
00092   const unsigned int quarterPoints = num_points / 4;    
00093   __m128 cplxValue1, cplxValue2, iValue, qValue;
00094 
00095   __m128 invScalar = _mm_set_ps1(1.0/scalar);
00096   int8_t* complexVectorPtr = (int8_t*)complexVector;
00097 
00098   float floatBuffer[8] __attribute__((aligned(128)));
00099 
00100   for(;number < quarterPoints; number++){
00101     floatBuffer[0] = (float)(complexVectorPtr[0]);
00102     floatBuffer[1] = (float)(complexVectorPtr[1]);
00103     floatBuffer[2] = (float)(complexVectorPtr[2]);
00104     floatBuffer[3] = (float)(complexVectorPtr[3]);
00105       
00106     floatBuffer[4] = (float)(complexVectorPtr[4]);
00107     floatBuffer[5] = (float)(complexVectorPtr[5]);
00108     floatBuffer[6] = (float)(complexVectorPtr[6]);
00109     floatBuffer[7] = (float)(complexVectorPtr[7]);
00110 
00111     cplxValue1 = _mm_load_ps(&floatBuffer[0]);
00112     cplxValue2 = _mm_load_ps(&floatBuffer[4]);
00113 
00114     complexVectorPtr += 8;
00115 
00116     cplxValue1 = _mm_mul_ps(cplxValue1, invScalar);
00117     cplxValue2 = _mm_mul_ps(cplxValue2, invScalar);
00118 
00119     // Arrange in i1i2i3i4 format
00120     iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
00121     qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
00122 
00123     _mm_store_ps(iBufferPtr, iValue);
00124     _mm_store_ps(qBufferPtr, qValue);
00125 
00126     iBufferPtr += 4;
00127     qBufferPtr += 4;
00128   }
00129 
00130   number = quarterPoints * 4;
00131   complexVectorPtr = (int8_t*)&complexVector[number];
00132   for(; number < num_points; number++){
00133     *iBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
00134     *qBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
00135   }
00136 }
00137 #endif /* LV_HAVE_SSE */
00138 
00139 #if LV_HAVE_GENERIC
00140 /*!
00141   \brief Deinterleaves the complex 8 bit vector into I & Q floating point vector data
00142   \param complexVector The complex input vector
00143   \param iBuffer The I buffer output data
00144   \param qBuffer The Q buffer output data
00145   \param scalar The scaling value being multiplied against each data point
00146   \param num_points The number of complex data values to be deinterleaved
00147 */
00148 static inline void volk_8ic_s32f_deinterleave_32f_x2_a16_generic(float* iBuffer, float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){
00149   const int8_t* complexVectorPtr = (const int8_t*)complexVector;
00150   float* iBufferPtr = iBuffer;
00151   float* qBufferPtr = qBuffer;
00152   unsigned int number;
00153   const float invScalar = 1.0 / scalar;
00154   for(number = 0; number < num_points; number++){
00155     *iBufferPtr++ = (float)(*complexVectorPtr++)*invScalar;
00156     *qBufferPtr++ = (float)(*complexVectorPtr++)*invScalar;
00157   }
00158 }
00159 #endif /* LV_HAVE_GENERIC */
00160 
00161 
00162 
00163 
00164 #endif /* INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a16_H */