GNU Radio 3.4.0 C++ API
volk_32f_x2_s32f_interleave_16ic_a16.h
Go to the documentation of this file.
00001 #ifndef INCLUDED_volk_32f_x2_s32f_interleave_16ic_a16_H
00002 #define INCLUDED_volk_32f_x2_s32f_interleave_16ic_a16_H
00003 
00004 #include <inttypes.h>
00005 #include <stdio.h>
00006 
00007 #if LV_HAVE_SSE2
00008 #include <emmintrin.h>
00009   /*!
00010     \brief Interleaves the I & Q vector data into the complex vector, scales the output values by the scalar, and converts to 16 bit data.
00011     \param iBuffer The I buffer data to be interleaved
00012     \param qBuffer The Q buffer data to be interleaved
00013     \param complexVector The complex output vector
00014     \param scalar The scaling value being multiplied against each data point
00015     \param num_points The number of complex data values to be interleaved
00016   */
00017 static inline void volk_32f_x2_s32f_interleave_16ic_a16_sse2(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){
00018     unsigned int number = 0;
00019     const float* iBufferPtr = iBuffer;
00020     const float* qBufferPtr = qBuffer;
00021 
00022     __m128 vScalar = _mm_set_ps1(scalar);
00023 
00024     const unsigned int quarterPoints = num_points / 4;
00025     
00026     __m128 iValue, qValue, cplxValue1, cplxValue2;
00027     __m128i intValue1, intValue2;
00028 
00029     int16_t* complexVectorPtr = (int16_t*)complexVector;
00030 
00031     for(;number < quarterPoints; number++){
00032       iValue = _mm_load_ps(iBufferPtr);
00033       qValue = _mm_load_ps(qBufferPtr);
00034 
00035       // Interleaves the lower two values in the i and q variables into one buffer
00036       cplxValue1 = _mm_unpacklo_ps(iValue, qValue);
00037       cplxValue1 = _mm_mul_ps(cplxValue1, vScalar);
00038 
00039       // Interleaves the upper two values in the i and q variables into one buffer
00040       cplxValue2 = _mm_unpackhi_ps(iValue, qValue);
00041       cplxValue2 = _mm_mul_ps(cplxValue2, vScalar);
00042 
00043       intValue1 = _mm_cvtps_epi32(cplxValue1);
00044       intValue2 = _mm_cvtps_epi32(cplxValue2);
00045 
00046       intValue1 = _mm_packs_epi32(intValue1, intValue2);
00047 
00048       _mm_store_si128((__m128i*)complexVectorPtr, intValue1);
00049       complexVectorPtr += 8;
00050 
00051       iBufferPtr += 4;
00052       qBufferPtr += 4;
00053     }
00054 
00055     number = quarterPoints * 4;
00056     complexVectorPtr = (int16_t*)(&complexVector[number]);
00057     for(; number < num_points; number++){
00058       *complexVectorPtr++ = (int16_t)(*iBufferPtr++ * scalar);
00059       *complexVectorPtr++ = (int16_t)(*qBufferPtr++ * scalar);
00060     }
00061     
00062 }
00063 #endif /* LV_HAVE_SSE2 */
00064 
00065 #if LV_HAVE_SSE
00066 #include <xmmintrin.h>
00067   /*!
00068     \brief Interleaves the I & Q vector data into the complex vector, scales the output values by the scalar, and converts to 16 bit data.
00069     \param iBuffer The I buffer data to be interleaved
00070     \param qBuffer The Q buffer data to be interleaved
00071     \param complexVector The complex output vector
00072     \param scalar The scaling value being multiplied against each data point
00073     \param num_points The number of complex data values to be interleaved
00074   */
00075 static inline void volk_32f_x2_s32f_interleave_16ic_a16_sse(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){
00076     unsigned int number = 0;
00077     const float* iBufferPtr = iBuffer;
00078     const float* qBufferPtr = qBuffer;
00079 
00080     __m128 vScalar = _mm_set_ps1(scalar);
00081 
00082     const unsigned int quarterPoints = num_points / 4;
00083     
00084     __m128 iValue, qValue, cplxValue;
00085 
00086     int16_t* complexVectorPtr = (int16_t*)complexVector;
00087 
00088     float floatBuffer[4] __attribute__((aligned(128)));
00089 
00090     for(;number < quarterPoints; number++){
00091       iValue = _mm_load_ps(iBufferPtr);
00092       qValue = _mm_load_ps(qBufferPtr);
00093 
00094       // Interleaves the lower two values in the i and q variables into one buffer
00095       cplxValue = _mm_unpacklo_ps(iValue, qValue);
00096       cplxValue = _mm_mul_ps(cplxValue, vScalar);
00097 
00098       _mm_store_ps(floatBuffer, cplxValue);
00099 
00100       *complexVectorPtr++ = (int16_t)(floatBuffer[0]);
00101       *complexVectorPtr++ = (int16_t)(floatBuffer[1]);
00102       *complexVectorPtr++ = (int16_t)(floatBuffer[2]);
00103       *complexVectorPtr++ = (int16_t)(floatBuffer[3]);
00104 
00105       // Interleaves the upper two values in the i and q variables into one buffer
00106       cplxValue = _mm_unpackhi_ps(iValue, qValue);
00107       cplxValue = _mm_mul_ps(cplxValue, vScalar);
00108  
00109       _mm_store_ps(floatBuffer, cplxValue);
00110       
00111       *complexVectorPtr++ = (int16_t)(floatBuffer[0]);
00112       *complexVectorPtr++ = (int16_t)(floatBuffer[1]);
00113       *complexVectorPtr++ = (int16_t)(floatBuffer[2]);
00114       *complexVectorPtr++ = (int16_t)(floatBuffer[3]);
00115 
00116       iBufferPtr += 4;
00117       qBufferPtr += 4;
00118     }
00119 
00120     number = quarterPoints * 4;
00121     complexVectorPtr = (int16_t*)(&complexVector[number]);
00122     for(; number < num_points; number++){
00123       *complexVectorPtr++ = (int16_t)(*iBufferPtr++ * scalar);
00124       *complexVectorPtr++ = (int16_t)(*qBufferPtr++ * scalar);
00125     }
00126     
00127 }
00128 #endif /* LV_HAVE_SSE */
00129 
00130 #if LV_HAVE_GENERIC
00131   /*!
00132     \brief Interleaves the I & Q vector data into the complex vector, scales the output values by the scalar, and converts to 16 bit data.
00133     \param iBuffer The I buffer data to be interleaved
00134     \param qBuffer The Q buffer data to be interleaved
00135     \param complexVector The complex output vector
00136     \param scalar The scaling value being multiplied against each data point
00137     \param num_points The number of complex data values to be interleaved
00138   */
00139 static inline void volk_32f_x2_s32f_interleave_16ic_a16_generic(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){
00140   int16_t* complexVectorPtr = (int16_t*)complexVector;
00141   const float* iBufferPtr = iBuffer;
00142   const float* qBufferPtr = qBuffer;
00143   unsigned int number = 0;
00144 
00145   for(number = 0; number < num_points; number++){
00146     *complexVectorPtr++ = (int16_t)(*iBufferPtr++ * scalar);
00147     *complexVectorPtr++ = (int16_t)(*qBufferPtr++ * scalar);
00148   }
00149 }
00150 #endif /* LV_HAVE_GENERIC */
00151 
00152 
00153 
00154 
00155 #endif /* INCLUDED_volk_32f_x2_s32f_interleave_16ic_a16_H */