GNU Radio 3.4.0 C++ API
volk_16ic_s32f_deinterleave_real_32f_a16.h
Go to the documentation of this file.
00001 #ifndef INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a16_H
00002 #define INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a16_H
00003 
00004 #include <inttypes.h>
00005 #include <stdio.h>
00006 
00007 #if LV_HAVE_SSE4_1
00008 #include <smmintrin.h>
00009 /*!
00010   \brief Deinterleaves the complex 16 bit vector into I float vector data
00011   \param complexVector The complex input vector
00012   \param iBuffer The I buffer output data
00013   \param scalar The scaling value being multiplied against each data point
00014   \param num_points The number of complex data values to be deinterleaved
00015 */
00016 static inline void volk_16ic_s32f_deinterleave_real_32f_a16_sse4_1(float* iBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
00017   float* iBufferPtr = iBuffer;
00018 
00019   unsigned int number = 0;
00020   const unsigned int quarterPoints = num_points / 4;    
00021 
00022   __m128 iFloatValue;
00023 
00024   const float iScalar= 1.0 / scalar;
00025   __m128 invScalar = _mm_set_ps1(iScalar);
00026   __m128i complexVal, iIntVal;
00027   int8_t* complexVectorPtr = (int8_t*)complexVector;
00028 
00029   __m128i moveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0);
00030 
00031   for(;number < quarterPoints; number++){
00032     complexVal = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 16;
00033     complexVal = _mm_shuffle_epi8(complexVal, moveMask);
00034 
00035     iIntVal = _mm_cvtepi16_epi32(complexVal);
00036     iFloatValue = _mm_cvtepi32_ps(iIntVal);
00037 
00038     iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
00039 
00040     _mm_store_ps(iBufferPtr, iFloatValue);
00041 
00042     iBufferPtr += 4;
00043   }
00044 
00045   number = quarterPoints * 4;
00046   int16_t* sixteenTComplexVectorPtr = (int16_t*)&complexVector[number];
00047   for(; number < num_points; number++){
00048     *iBufferPtr++ = ((float)(*sixteenTComplexVectorPtr++)) * iScalar;
00049     sixteenTComplexVectorPtr++;
00050   }
00051     
00052 }
00053 #endif /* LV_HAVE_SSE4_1 */
00054 
00055 #if LV_HAVE_SSE
00056 #include <xmmintrin.h>
00057 /*!
00058   \brief Deinterleaves the complex 16 bit vector into I float vector data
00059   \param complexVector The complex input vector
00060   \param iBuffer The I buffer output data
00061   \param scalar The scaling value being multiplied against each data point
00062   \param num_points The number of complex data values to be deinterleaved
00063 */
00064 static inline void volk_16ic_s32f_deinterleave_real_32f_a16_sse(float* iBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
00065   float* iBufferPtr = iBuffer;
00066 
00067   unsigned int number = 0;
00068   const unsigned int quarterPoints = num_points / 4;    
00069   __m128 iValue;
00070 
00071   const float iScalar = 1.0/scalar;
00072   __m128 invScalar = _mm_set_ps1(iScalar);
00073   int16_t* complexVectorPtr = (int16_t*)complexVector;
00074 
00075   float floatBuffer[4] __attribute__((aligned(128)));
00076 
00077   for(;number < quarterPoints; number++){
00078     floatBuffer[0] = (float)(*complexVectorPtr); complexVectorPtr += 2;
00079     floatBuffer[1] = (float)(*complexVectorPtr); complexVectorPtr += 2; 
00080     floatBuffer[2] = (float)(*complexVectorPtr); complexVectorPtr += 2;
00081     floatBuffer[3] = (float)(*complexVectorPtr); complexVectorPtr += 2;
00082 
00083     iValue = _mm_load_ps(floatBuffer);
00084 
00085     iValue = _mm_mul_ps(iValue, invScalar);
00086 
00087     _mm_store_ps(iBufferPtr, iValue);
00088 
00089     iBufferPtr += 4;
00090   }
00091 
00092   number = quarterPoints * 4;
00093   complexVectorPtr = (int16_t*)&complexVector[number];
00094   for(; number < num_points; number++){
00095     *iBufferPtr++ = ((float)(*complexVectorPtr++)) * iScalar;
00096     complexVectorPtr++;
00097   }
00098     
00099 }
00100 #endif /* LV_HAVE_SSE */
00101 
00102 #if LV_HAVE_GENERIC
00103 /*!
00104   \brief Deinterleaves the complex 16 bit vector into I float vector data
00105   \param complexVector The complex input vector
00106   \param iBuffer The I buffer output data
00107   \param scalar The scaling value being multiplied against each data point
00108   \param num_points The number of complex data values to be deinterleaved
00109 */
00110 static inline void volk_16ic_s32f_deinterleave_real_32f_a16_generic(float* iBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
00111   unsigned int number = 0;
00112   const int16_t* complexVectorPtr = (const int16_t*)complexVector;
00113   float* iBufferPtr = iBuffer;
00114   const float invScalar = 1.0 / scalar;
00115   for(number = 0; number < num_points; number++){
00116     *iBufferPtr++ = ((float)(*complexVectorPtr++)) * invScalar;
00117     complexVectorPtr++;
00118   }
00119 }
00120 #endif /* LV_HAVE_GENERIC */
00121 
00122 
00123 
00124 
00125 #endif /* INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a16_H */