GNU Radio 3.4.0 C++ API
volk_32f_s32f_stddev_32f_a16.h
Go to the documentation of this file.
00001 #ifndef INCLUDED_volk_32f_s32f_stddev_32f_a16_H
00002 #define INCLUDED_volk_32f_s32f_stddev_32f_a16_H
00003 
00004 #include <inttypes.h>
00005 #include <stdio.h>
00006 #include <math.h>
00007 
00008 #if LV_HAVE_SSE4_1
00009 #include <smmintrin.h>
00010 /*!
00011   \brief Calculates the standard deviation of the input buffer using the supplied mean
00012   \param stddev The calculated standard deviation
00013   \param inputBuffer The buffer of points to calculate the std deviation for
00014   \param mean The mean of the input buffer
00015   \param num_points The number of values in input buffer to used in the stddev calculation
00016 */
00017 static inline void volk_32f_s32f_stddev_32f_a16_sse4_1(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){
00018   float returnValue = 0;
00019   if(num_points > 0){
00020     unsigned int number = 0;
00021     const unsigned int sixteenthPoints = num_points / 16;
00022 
00023     const float* aPtr = inputBuffer;
00024 
00025     float squareBuffer[4] __attribute__((aligned(128)));
00026 
00027     __m128 squareAccumulator = _mm_setzero_ps();
00028     __m128 aVal1, aVal2, aVal3, aVal4;
00029     __m128 cVal1, cVal2, cVal3, cVal4;
00030     for(;number < sixteenthPoints; number++) {
00031       aVal1 = _mm_load_ps(aPtr); aPtr += 4;    
00032       cVal1 = _mm_dp_ps(aVal1, aVal1, 0xF1);
00033 
00034       aVal2 = _mm_load_ps(aPtr); aPtr += 4;
00035       cVal2 = _mm_dp_ps(aVal2, aVal2, 0xF2);
00036 
00037       aVal3 = _mm_load_ps(aPtr); aPtr += 4;
00038       cVal3 = _mm_dp_ps(aVal3, aVal3, 0xF4);
00039 
00040       aVal4 = _mm_load_ps(aPtr); aPtr += 4;
00041       cVal4 = _mm_dp_ps(aVal4, aVal4, 0xF8);
00042 
00043       cVal1 = _mm_or_ps(cVal1, cVal2);
00044       cVal3 = _mm_or_ps(cVal3, cVal4);
00045       cVal1 = _mm_or_ps(cVal1, cVal3);
00046 
00047       squareAccumulator = _mm_add_ps(squareAccumulator, cVal1); // squareAccumulator += x^2
00048     }
00049     _mm_store_ps(squareBuffer,squareAccumulator); // Store the results back into the C container  
00050     returnValue = squareBuffer[0];
00051     returnValue += squareBuffer[1];
00052     returnValue += squareBuffer[2];
00053     returnValue += squareBuffer[3];
00054   
00055     number = sixteenthPoints * 16;
00056     for(;number < num_points; number++){
00057       returnValue += (*aPtr) * (*aPtr);
00058       aPtr++;
00059     }
00060     returnValue /= num_points;
00061     returnValue -= (mean * mean);
00062     returnValue = sqrt(returnValue);
00063   }
00064   *stddev = returnValue;
00065 }
00066 #endif /* LV_HAVE_SSE4_1 */
00067 
00068 #if LV_HAVE_SSE
00069 #include <xmmintrin.h>
00070 /*!
00071   \brief Calculates the standard deviation of the input buffer using the supplied mean
00072   \param stddev The calculated standard deviation
00073   \param inputBuffer The buffer of points to calculate the std deviation for
00074   \param mean The mean of the input buffer
00075   \param num_points The number of values in input buffer to used in the stddev calculation
00076 */
00077 static inline void volk_32f_s32f_stddev_32f_a16_sse(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){
00078   float returnValue = 0;
00079   if(num_points > 0){
00080     unsigned int number = 0;
00081     const unsigned int quarterPoints = num_points / 4;
00082 
00083     const float* aPtr = inputBuffer;
00084 
00085     float squareBuffer[4] __attribute__((aligned(128)));
00086 
00087     __m128 squareAccumulator = _mm_setzero_ps();
00088     __m128 aVal = _mm_setzero_ps();
00089     for(;number < quarterPoints; number++) {
00090       aVal = _mm_load_ps(aPtr);                     // aVal = x
00091       aVal = _mm_mul_ps(aVal, aVal);                // squareAccumulator += x^2
00092       squareAccumulator = _mm_add_ps(squareAccumulator, aVal);
00093       aPtr += 4;
00094     }
00095     _mm_store_ps(squareBuffer,squareAccumulator); // Store the results back into the C container  
00096     returnValue = squareBuffer[0];
00097     returnValue += squareBuffer[1];
00098     returnValue += squareBuffer[2];
00099     returnValue += squareBuffer[3];
00100   
00101     number = quarterPoints * 4;
00102     for(;number < num_points; number++){
00103       returnValue += (*aPtr) * (*aPtr);
00104       aPtr++;
00105     }
00106     returnValue /= num_points;
00107     returnValue -= (mean * mean);
00108     returnValue = sqrt(returnValue);
00109   }
00110   *stddev = returnValue;
00111 }
00112 #endif /* LV_HAVE_SSE */
00113 
00114 #if LV_HAVE_GENERIC
00115 /*!
00116   \brief Calculates the standard deviation of the input buffer using the supplied mean
00117   \param stddev The calculated standard deviation
00118   \param inputBuffer The buffer of points to calculate the std deviation for
00119   \param mean The mean of the input buffer
00120   \param num_points The number of values in input buffer to used in the stddev calculation
00121 */
00122 static inline void volk_32f_s32f_stddev_32f_a16_generic(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){
00123   float returnValue = 0;
00124   if(num_points > 0){
00125     const float* aPtr = inputBuffer;
00126     unsigned int number = 0;
00127       
00128     for(number = 0; number < num_points; number++){
00129       returnValue += (*aPtr) * (*aPtr);
00130       aPtr++;
00131     }
00132 
00133     returnValue /= num_points;
00134     returnValue -= (mean * mean);
00135     returnValue = sqrt(returnValue);
00136   }
00137   *stddev = returnValue;
00138 }
00139 #endif /* LV_HAVE_GENERIC */
00140 
00141 
00142 
00143 
00144 #endif /* INCLUDED_volk_32f_s32f_stddev_32f_a16_H */