GNU Radio 3.4.0 C++ API
|
00001 #ifndef INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a16_H 00002 #define INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a16_H 00003 00004 #include <inttypes.h> 00005 #include <stdio.h> 00006 00007 #if LV_HAVE_SSE4_1 00008 #include <smmintrin.h> 00009 /*! 00010 \brief Deinterleaves the complex 8 bit vector into I & Q floating point vector data 00011 \param complexVector The complex input vector 00012 \param iBuffer The I buffer output data 00013 \param qBuffer The Q buffer output data 00014 \param scalar The scaling value being multiplied against each data point 00015 \param num_points The number of complex data values to be deinterleaved 00016 */ 00017 static inline void volk_8ic_s32f_deinterleave_32f_x2_a16_sse4_1(float* iBuffer, float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ 00018 float* iBufferPtr = iBuffer; 00019 float* qBufferPtr = qBuffer; 00020 00021 unsigned int number = 0; 00022 const unsigned int eighthPoints = num_points / 8; 00023 __m128 iFloatValue, qFloatValue; 00024 00025 const float iScalar= 1.0 / scalar; 00026 __m128 invScalar = _mm_set_ps1(iScalar); 00027 __m128i complexVal, iIntVal, qIntVal, iComplexVal, qComplexVal; 00028 int8_t* complexVectorPtr = (int8_t*)complexVector; 00029 00030 __m128i iMoveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0); 00031 __m128i qMoveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 15, 13, 11, 9, 7, 5, 3, 1); 00032 00033 for(;number < eighthPoints; number++){ 00034 complexVal = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 16; 00035 iComplexVal = _mm_shuffle_epi8(complexVal, iMoveMask); 00036 qComplexVal = _mm_shuffle_epi8(complexVal, qMoveMask); 00037 00038 iIntVal = _mm_cvtepi8_epi32(iComplexVal); 00039 iFloatValue = _mm_cvtepi32_ps(iIntVal); 00040 iFloatValue = _mm_mul_ps(iFloatValue, invScalar); 00041 _mm_store_ps(iBufferPtr, iFloatValue); 00042 iBufferPtr += 4; 00043 00044 iComplexVal = _mm_srli_si128(iComplexVal, 4); 00045 00046 iIntVal = _mm_cvtepi8_epi32(iComplexVal); 00047 iFloatValue = _mm_cvtepi32_ps(iIntVal); 00048 iFloatValue = _mm_mul_ps(iFloatValue, invScalar); 00049 _mm_store_ps(iBufferPtr, iFloatValue); 00050 iBufferPtr += 4; 00051 00052 qIntVal = _mm_cvtepi8_epi32(qComplexVal); 00053 qFloatValue = _mm_cvtepi32_ps(qIntVal); 00054 qFloatValue = _mm_mul_ps(qFloatValue, invScalar); 00055 _mm_store_ps(qBufferPtr, qFloatValue); 00056 qBufferPtr += 4; 00057 00058 qComplexVal = _mm_srli_si128(qComplexVal, 4); 00059 00060 qIntVal = _mm_cvtepi8_epi32(qComplexVal); 00061 qFloatValue = _mm_cvtepi32_ps(qIntVal); 00062 qFloatValue = _mm_mul_ps(qFloatValue, invScalar); 00063 _mm_store_ps(qBufferPtr, qFloatValue); 00064 00065 qBufferPtr += 4; 00066 } 00067 00068 number = eighthPoints * 8; 00069 for(; number < num_points; number++){ 00070 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar; 00071 *qBufferPtr++ = (float)(*complexVectorPtr++) * iScalar; 00072 } 00073 00074 } 00075 #endif /* LV_HAVE_SSE4_1 */ 00076 00077 #if LV_HAVE_SSE 00078 #include <xmmintrin.h> 00079 /*! 00080 \brief Deinterleaves the complex 8 bit vector into I & Q floating point vector data 00081 \param complexVector The complex input vector 00082 \param iBuffer The I buffer output data 00083 \param qBuffer The Q buffer output data 00084 \param scalar The scaling value being multiplied against each data point 00085 \param num_points The number of complex data values to be deinterleaved 00086 */ 00087 static inline void volk_8ic_s32f_deinterleave_32f_x2_a16_sse(float* iBuffer, float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ 00088 float* iBufferPtr = iBuffer; 00089 float* qBufferPtr = qBuffer; 00090 00091 unsigned int number = 0; 00092 const unsigned int quarterPoints = num_points / 4; 00093 __m128 cplxValue1, cplxValue2, iValue, qValue; 00094 00095 __m128 invScalar = _mm_set_ps1(1.0/scalar); 00096 int8_t* complexVectorPtr = (int8_t*)complexVector; 00097 00098 float floatBuffer[8] __attribute__((aligned(128))); 00099 00100 for(;number < quarterPoints; number++){ 00101 floatBuffer[0] = (float)(complexVectorPtr[0]); 00102 floatBuffer[1] = (float)(complexVectorPtr[1]); 00103 floatBuffer[2] = (float)(complexVectorPtr[2]); 00104 floatBuffer[3] = (float)(complexVectorPtr[3]); 00105 00106 floatBuffer[4] = (float)(complexVectorPtr[4]); 00107 floatBuffer[5] = (float)(complexVectorPtr[5]); 00108 floatBuffer[6] = (float)(complexVectorPtr[6]); 00109 floatBuffer[7] = (float)(complexVectorPtr[7]); 00110 00111 cplxValue1 = _mm_load_ps(&floatBuffer[0]); 00112 cplxValue2 = _mm_load_ps(&floatBuffer[4]); 00113 00114 complexVectorPtr += 8; 00115 00116 cplxValue1 = _mm_mul_ps(cplxValue1, invScalar); 00117 cplxValue2 = _mm_mul_ps(cplxValue2, invScalar); 00118 00119 // Arrange in i1i2i3i4 format 00120 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0)); 00121 qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1)); 00122 00123 _mm_store_ps(iBufferPtr, iValue); 00124 _mm_store_ps(qBufferPtr, qValue); 00125 00126 iBufferPtr += 4; 00127 qBufferPtr += 4; 00128 } 00129 00130 number = quarterPoints * 4; 00131 complexVectorPtr = (int8_t*)&complexVector[number]; 00132 for(; number < num_points; number++){ 00133 *iBufferPtr++ = (float)(*complexVectorPtr++) / scalar; 00134 *qBufferPtr++ = (float)(*complexVectorPtr++) / scalar; 00135 } 00136 } 00137 #endif /* LV_HAVE_SSE */ 00138 00139 #if LV_HAVE_GENERIC 00140 /*! 00141 \brief Deinterleaves the complex 8 bit vector into I & Q floating point vector data 00142 \param complexVector The complex input vector 00143 \param iBuffer The I buffer output data 00144 \param qBuffer The Q buffer output data 00145 \param scalar The scaling value being multiplied against each data point 00146 \param num_points The number of complex data values to be deinterleaved 00147 */ 00148 static inline void volk_8ic_s32f_deinterleave_32f_x2_a16_generic(float* iBuffer, float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ 00149 const int8_t* complexVectorPtr = (const int8_t*)complexVector; 00150 float* iBufferPtr = iBuffer; 00151 float* qBufferPtr = qBuffer; 00152 unsigned int number; 00153 const float invScalar = 1.0 / scalar; 00154 for(number = 0; number < num_points; number++){ 00155 *iBufferPtr++ = (float)(*complexVectorPtr++)*invScalar; 00156 *qBufferPtr++ = (float)(*complexVectorPtr++)*invScalar; 00157 } 00158 } 00159 #endif /* LV_HAVE_GENERIC */ 00160 00161 00162 00163 00164 #endif /* INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a16_H */