GNU Radio 3.4.0 C++ API
|
00001 #ifndef INCLUDED_volk_16ic_magnitude_16i_a16_H 00002 #define INCLUDED_volk_16ic_magnitude_16i_a16_H 00003 00004 #include <inttypes.h> 00005 #include <stdio.h> 00006 #include <math.h> 00007 00008 #if LV_HAVE_SSE3 00009 #include <pmmintrin.h> 00010 /*! 00011 \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector 00012 \param complexVector The vector containing the complex input values 00013 \param magnitudeVector The vector containing the real output values 00014 \param num_points The number of complex values in complexVector to be calculated and stored into cVector 00015 */ 00016 static inline void volk_16ic_magnitude_16i_a16_sse3(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){ 00017 unsigned int number = 0; 00018 const unsigned int quarterPoints = num_points / 4; 00019 00020 const int16_t* complexVectorPtr = (const int16_t*)complexVector; 00021 int16_t* magnitudeVectorPtr = magnitudeVector; 00022 00023 __m128 vScalar = _mm_set_ps1(32768.0); 00024 __m128 invScalar = _mm_set_ps1(1.0/32768.0); 00025 00026 __m128 cplxValue1, cplxValue2, result; 00027 00028 float inputFloatBuffer[8] __attribute__((aligned(128))); 00029 float outputFloatBuffer[4] __attribute__((aligned(128))); 00030 00031 for(;number < quarterPoints; number++){ 00032 00033 inputFloatBuffer[0] = (float)(complexVectorPtr[0]); 00034 inputFloatBuffer[1] = (float)(complexVectorPtr[1]); 00035 inputFloatBuffer[2] = (float)(complexVectorPtr[2]); 00036 inputFloatBuffer[3] = (float)(complexVectorPtr[3]); 00037 00038 inputFloatBuffer[4] = (float)(complexVectorPtr[4]); 00039 inputFloatBuffer[5] = (float)(complexVectorPtr[5]); 00040 inputFloatBuffer[6] = (float)(complexVectorPtr[6]); 00041 inputFloatBuffer[7] = (float)(complexVectorPtr[7]); 00042 00043 cplxValue1 = _mm_load_ps(&inputFloatBuffer[0]); 00044 cplxValue2 = _mm_load_ps(&inputFloatBuffer[4]); 00045 00046 complexVectorPtr += 8; 00047 00048 cplxValue1 = _mm_mul_ps(cplxValue1, invScalar); 00049 cplxValue2 = _mm_mul_ps(cplxValue2, invScalar); 00050 00051 cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values 00052 cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values 00053 00054 result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values 00055 00056 result = _mm_sqrt_ps(result); // Square root the values 00057 00058 result = _mm_mul_ps(result, vScalar); // Scale the results 00059 00060 _mm_store_ps(outputFloatBuffer, result); 00061 *magnitudeVectorPtr++ = (int16_t)(outputFloatBuffer[0]); 00062 *magnitudeVectorPtr++ = (int16_t)(outputFloatBuffer[1]); 00063 *magnitudeVectorPtr++ = (int16_t)(outputFloatBuffer[2]); 00064 *magnitudeVectorPtr++ = (int16_t)(outputFloatBuffer[3]); 00065 } 00066 00067 number = quarterPoints * 4; 00068 magnitudeVectorPtr = &magnitudeVector[number]; 00069 complexVectorPtr = (const int16_t*)&complexVector[number]; 00070 for(; number < num_points; number++){ 00071 const float val1Real = (float)(*complexVectorPtr++) / 32768.0; 00072 const float val1Imag = (float)(*complexVectorPtr++) / 32768.0; 00073 const float val1Result = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag)) * 32768.0; 00074 *magnitudeVectorPtr++ = (int16_t)(val1Result); 00075 } 00076 } 00077 #endif /* LV_HAVE_SSE3 */ 00078 00079 #if LV_HAVE_SSE 00080 #include <xmmintrin.h> 00081 /*! 00082 \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector 00083 \param complexVector The vector containing the complex input values 00084 \param magnitudeVector The vector containing the real output values 00085 \param num_points The number of complex values in complexVector to be calculated and stored into cVector 00086 */ 00087 static inline void volk_16ic_magnitude_16i_a16_sse(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){ 00088 unsigned int number = 0; 00089 const unsigned int quarterPoints = num_points / 4; 00090 00091 const int16_t* complexVectorPtr = (const int16_t*)complexVector; 00092 int16_t* magnitudeVectorPtr = magnitudeVector; 00093 00094 __m128 vScalar = _mm_set_ps1(32768.0); 00095 __m128 invScalar = _mm_set_ps1(1.0/32768.0); 00096 00097 __m128 cplxValue1, cplxValue2, iValue, qValue, result; 00098 00099 float inputFloatBuffer[4] __attribute__((aligned(128))); 00100 float outputFloatBuffer[4] __attribute__((aligned(128))); 00101 00102 for(;number < quarterPoints; number++){ 00103 00104 inputFloatBuffer[0] = (float)(complexVectorPtr[0]); 00105 inputFloatBuffer[1] = (float)(complexVectorPtr[1]); 00106 inputFloatBuffer[2] = (float)(complexVectorPtr[2]); 00107 inputFloatBuffer[3] = (float)(complexVectorPtr[3]); 00108 00109 cplxValue1 = _mm_load_ps(inputFloatBuffer); 00110 complexVectorPtr += 4; 00111 00112 inputFloatBuffer[0] = (float)(complexVectorPtr[0]); 00113 inputFloatBuffer[1] = (float)(complexVectorPtr[1]); 00114 inputFloatBuffer[2] = (float)(complexVectorPtr[2]); 00115 inputFloatBuffer[3] = (float)(complexVectorPtr[3]); 00116 00117 cplxValue2 = _mm_load_ps(inputFloatBuffer); 00118 complexVectorPtr += 4; 00119 00120 cplxValue1 = _mm_mul_ps(cplxValue1, invScalar); 00121 cplxValue2 = _mm_mul_ps(cplxValue2, invScalar); 00122 00123 // Arrange in i1i2i3i4 format 00124 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0)); 00125 // Arrange in q1q2q3q4 format 00126 qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1)); 00127 00128 iValue = _mm_mul_ps(iValue, iValue); // Square the I values 00129 qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values 00130 00131 result = _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values 00132 00133 result = _mm_sqrt_ps(result); // Square root the values 00134 00135 result = _mm_mul_ps(result, vScalar); // Scale the results 00136 00137 _mm_store_ps(outputFloatBuffer, result); 00138 *magnitudeVectorPtr++ = (int16_t)(outputFloatBuffer[0]); 00139 *magnitudeVectorPtr++ = (int16_t)(outputFloatBuffer[1]); 00140 *magnitudeVectorPtr++ = (int16_t)(outputFloatBuffer[2]); 00141 *magnitudeVectorPtr++ = (int16_t)(outputFloatBuffer[3]); 00142 } 00143 00144 number = quarterPoints * 4; 00145 magnitudeVectorPtr = &magnitudeVector[number]; 00146 complexVectorPtr = (const int16_t*)&complexVector[number]; 00147 for(; number < num_points; number++){ 00148 const float val1Real = (float)(*complexVectorPtr++) / 32768.0; 00149 const float val1Imag = (float)(*complexVectorPtr++) / 32768.0; 00150 const float val1Result = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag)) * 32768.0; 00151 *magnitudeVectorPtr++ = (int16_t)(val1Result); 00152 } 00153 } 00154 #endif /* LV_HAVE_SSE */ 00155 00156 #if LV_HAVE_GENERIC 00157 /*! 00158 \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector 00159 \param complexVector The vector containing the complex input values 00160 \param magnitudeVector The vector containing the real output values 00161 \param num_points The number of complex values in complexVector to be calculated and stored into cVector 00162 */ 00163 static inline void volk_16ic_magnitude_16i_a16_generic(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){ 00164 const int16_t* complexVectorPtr = (const int16_t*)complexVector; 00165 int16_t* magnitudeVectorPtr = magnitudeVector; 00166 unsigned int number = 0; 00167 const float scalar = 32768.0; 00168 for(number = 0; number < num_points; number++){ 00169 float real = ((float)(*complexVectorPtr++)) / scalar; 00170 float imag = ((float)(*complexVectorPtr++)) / scalar; 00171 *magnitudeVectorPtr++ = (int16_t)(sqrtf((real*real) + (imag*imag)) * scalar); 00172 } 00173 } 00174 #endif /* LV_HAVE_GENERIC */ 00175 00176 #if LV_HAVE_ORC_DISABLED 00177 /*! 00178 \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector 00179 \param complexVector The vector containing the complex input values 00180 \param magnitudeVector The vector containing the real output values 00181 \param num_points The number of complex values in complexVector to be calculated and stored into cVector 00182 */ 00183 extern void volk_16ic_magnitude_16i_a16_orc_impl(int16_t* magnitudeVector, const lv_16sc_t* complexVector, float scalar, unsigned int num_points); 00184 static inline void volk_16ic_magnitude_16i_a16_orc(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){ 00185 volk_16ic_magnitude_16i_a16_orc_impl(magnitudeVector, complexVector, 32768.0, num_points); 00186 } 00187 #endif /* LV_HAVE_ORC */ 00188 00189 00190 #endif /* INCLUDED_volk_16ic_magnitude_16i_a16_H */